diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 000000000..32e5a1e18
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,138 @@
+version: 2
+
+_defaults: &defaults
+ working_directory: ~/repo
+ environment:
+ TERM: dumb
+ docker:
+ - image: circleci/openjdk:8-jdk
+
+_setenv: &setenv
+ name: set CloudRepo credentials
+ command: |-
+ [ -d $HOME/.sbt ] || mkdir $HOME/.sbt
+ printf "realm=s22s.mycloudrepo.io\nhost=s22s.mycloudrepo.io\nuser=$CLOUDREPO_USER\npassword=$CLOUDREPO_PASSWORD\n" > $HOME/.sbt/.credentials
+
+_delenv: &unsetenv
+ name: delete CloudRepo credential
+ command: rm -rf $HOME/.sbt/.credentials || true
+
+_restore_cache: &restore_cache
+ keys:
+ - v2-dependencies-{{ checksum "build.sbt" }}
+ - v2-dependencies-
+
+_save_cache: &save_cache
+ key: v2-dependencies--{{ checksum "build.sbt" }}
+ paths:
+ - ~/.ivy2/cache
+ - ~/.sbt
+ - ~/.rf_cache
+
+jobs:
+ staticAnalysis:
+ <<: *defaults
+
+ steps:
+ - checkout
+ - run: *setenv
+ - restore_cache:
+ <<: *restore_cache
+
+ - run: cat /dev/null | sbt dependencyCheck
+ - run: cat /dev/null | sbt --debug dumpLicenseReport
+
+ - run: *unsetenv
+
+ - save_cache:
+ <<: *save_cache
+ - store_artifacts:
+ path: datasource/target/scala-2.11/dependency-check-report.html
+ destination: dependency-check-report-datasource.html
+ - store_artifacts:
+ path: experimental/target/scala-2.11/dependency-check-report.html
+ destination: dependency-check-report-experimental.html
+ - store_artifacts:
+ path: core/target/scala-2.11/dependency-check-report.html
+ destination: dependency-check-report-core.html
+ - store_artifacts:
+ path: pyrasterframes/target/scala-2.11/dependency-check-report.html
+ destination: dependency-check-report-pyrasterframes.html
+
+ test:
+ <<: *defaults
+ resource_class: large
+ steps:
+ - checkout
+ - run: *setenv
+ - restore_cache:
+ <<: *restore_cache
+
+ - run: sudo apt-get install python-pip pandoc && pip install setuptools # required for pyrasterframes testing
+ - run: cat /dev/null | sbt test
+
+ - run: *unsetenv
+ - save_cache:
+ <<: *save_cache
+
+ publish:
+ <<: *defaults
+ resource_class: large
+ steps:
+ - checkout
+ - run: *setenv
+ - restore_cache:
+ <<: *restore_cache
+
+ - run: sudo apt-get install python-pip pandoc && pip install setuptools # required for pyrasterframes testing
+ - run: cat /dev/null | sbt test
+ - run: cat /dev/null | sbt publish
+
+ - run: *unsetenv
+ - save_cache:
+ <<: *save_cache
+
+ it:
+ <<: *defaults
+ resource_class: xlarge
+ steps:
+ - checkout
+ - run: *setenv
+
+ - restore_cache:
+ <<: *restore_cache
+
+ - run:
+ command: cat /dev/null | sbt it:test
+ no_output_timeout: 30m
+ - run: *unsetenv
+
+ - save_cache:
+ <<: *save_cache
+
+workflows:
+ version: 2
+ all:
+ jobs:
+ - test
+ - it:
+ filters:
+ branches:
+ only:
+ - /astraea\/feature\/.*-its/
+ - publish:
+ filters:
+ branches:
+ only:
+ - astraea/develop
+ nightlyReleaseAstraea:
+ triggers:
+ - schedule:
+ cron: "0 8 * * *"
+ filters:
+ branches:
+ only:
+ - astraea/develop
+ jobs:
+ - it
+ - staticAnalysis
diff --git a/.scalafmt.conf b/.scalafmt.conf
index 61f56e01e..4d09e93c7 100644
--- a/.scalafmt.conf
+++ b/.scalafmt.conf
@@ -1,5 +1,7 @@
-maxColumn = 100
+maxColumn = 138
continuationIndent.defnSite = 2
+continuationIndent.callSite = 2
+continuationIndent.extendSite = 2
binPack.parentConstructors = true
binPack.literalArgumentLists = false
binPack.unsafeCallSite = true
diff --git a/.travis.yml b/.travis.yml
index b83a56286..fbe2823fa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,7 +5,8 @@ language: scala
cache:
directories:
- $HOME/.ivy2/cache
- - $HOME/.sbt/boot/
+ - $HOME/.sbt/boot
+ - $HOME/.rf_cache
scala:
- 2.11.11
@@ -14,17 +15,23 @@ jdk:
- oraclejdk8
addons:
- apt_packages:
- - pandoc
+ apt:
+ packages:
+ - pandoc
+ - python-pip
+
+install:
+ - pip install setuptools
sbt_args: -no-colors
script:
- - sbt -Dfile.encoding=UTF8 clean core/test datasource/test
+ - sbt test
+ - sbt it:test
# - sbt -Dfile.encoding=UTF8 clean coverage test coverageReport
# Tricks to avoid unnecessary cache updates
- find $HOME/.sbt -name "*.lock" | xargs rm
- find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
#after_success:
-# - bash <(curl -s https://codecov.io/bash)
+# - bash <(curl -s https://codecov.io/bash)
\ No newline at end of file
diff --git a/README.md b/README.md
index 8644f40a5..dddeb94ae 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,5 @@
™
-[![Build Status](https://travis-ci.org/locationtech/rasterframes.svg?branch=master)](https://travis-ci.org/s22s/raster-frames)
[![Join the chat at https://gitter.im/s22s/raster-frames](https://badges.gitter.im/s22s/raster-frames.svg)](https://gitter.im/s22s/raster-frames?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
_RasterFrames™_ brings the power of Spark DataFrames to geospatial raster data, empowered by the map algebra and tile layer operations of [GeoTrellis](https://geotrellis.io/).
diff --git a/bench/archive/jmh-results-20190206135840.json b/bench/archive/jmh-results-20190206135840.json
new file mode 100644
index 000000000..958dad131
--- /dev/null
+++ b/bench/archive/jmh-results-20190206135840.json
@@ -0,0 +1,1124 @@
+[
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "uint8",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 0.750756701914575,
+ "scoreError" : 0.03489921840300597,
+ "scoreConfidence" : [
+ 0.715857483511569,
+ 0.785655920317581
+ ],
+ "scorePercentiles" : {
+ "0.0" : 0.7386746014362018,
+ "50.0" : 0.752125418246387,
+ "90.0" : 0.7593754045781637,
+ "95.0" : 0.7593754045781637,
+ "99.0" : 0.7593754045781637,
+ "99.9" : 0.7593754045781637,
+ "99.99" : 0.7593754045781637,
+ "99.999" : 0.7593754045781637,
+ "99.9999" : 0.7593754045781637,
+ "100.0" : 0.7593754045781637
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 0.7386746014362018,
+ 0.7590235001754141,
+ 0.7593754045781637,
+ 0.7445845851367084,
+ 0.752125418246387
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "uint8",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 38.38058292433662,
+ "scoreError" : 0.5766293256970553,
+ "scoreConfidence" : [
+ 37.803953598639566,
+ 38.95721225003368
+ ],
+ "scorePercentiles" : {
+ "0.0" : 38.17950849889709,
+ "50.0" : 38.483487036965805,
+ "90.0" : 38.49375297517132,
+ "95.0" : 38.49375297517132,
+ "99.0" : 38.49375297517132,
+ "99.9" : 38.49375297517132,
+ "99.99" : 38.49375297517132,
+ "99.999" : 38.49375297517132,
+ "99.9999" : 38.49375297517132,
+ "100.0" : 38.49375297517132
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 38.17950849889709,
+ 38.25959587406489,
+ 38.483487036965805,
+ 38.49375297517132,
+ 38.48657023658396
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "int32",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 6.859730529387528,
+ "scoreError" : 0.15015114495123957,
+ "scoreConfidence" : [
+ 6.709579384436289,
+ 7.009881674338768
+ ],
+ "scorePercentiles" : {
+ "0.0" : 6.807625212637563,
+ "50.0" : 6.851807420817827,
+ "90.0" : 6.912795585073158,
+ "95.0" : 6.912795585073158,
+ "99.0" : 6.912795585073158,
+ "99.9" : 6.912795585073158,
+ "99.99" : 6.912795585073158,
+ "99.999" : 6.912795585073158,
+ "99.9999" : 6.912795585073158,
+ "100.0" : 6.912795585073158
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 6.851807420817827,
+ 6.847977811681636,
+ 6.912795585073158,
+ 6.807625212637563,
+ 6.878446616727458
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "int32",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 480.9422408208602,
+ "scoreError" : 5.771651213055483,
+ "scoreConfidence" : [
+ 475.17058960780474,
+ 486.71389203391567
+ ],
+ "scorePercentiles" : {
+ "0.0" : 478.50027097977335,
+ "50.0" : 481.1490574795575,
+ "90.0" : 482.35979851501855,
+ "95.0" : 482.35979851501855,
+ "99.0" : 482.35979851501855,
+ "99.9" : 482.35979851501855,
+ "99.99" : 482.35979851501855,
+ "99.999" : 482.35979851501855,
+ "99.9999" : 482.35979851501855,
+ "100.0" : 482.35979851501855
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 478.50027097977335,
+ 481.91533816448384,
+ 481.1490574795575,
+ 482.35979851501855,
+ 480.7867389654676
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "float32",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 6.818078360307711,
+ "scoreError" : 0.08318892187153049,
+ "scoreConfidence" : [
+ 6.734889438436181,
+ 6.901267282179241
+ ],
+ "scorePercentiles" : {
+ "0.0" : 6.7948532935574955,
+ "50.0" : 6.811025113390723,
+ "90.0" : 6.846424033994792,
+ "95.0" : 6.846424033994792,
+ "99.0" : 6.846424033994792,
+ "99.9" : 6.846424033994792,
+ "99.99" : 6.846424033994792,
+ "99.999" : 6.846424033994792,
+ "99.9999" : 6.846424033994792,
+ "100.0" : 6.846424033994792
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 6.803675604268625,
+ 6.811025113390723,
+ 6.7948532935574955,
+ 6.834413756326912,
+ 6.846424033994792
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "float32",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 493.1200339402855,
+ "scoreError" : 5.741953611582008,
+ "scoreConfidence" : [
+ 487.3780803287035,
+ 498.8619875518675
+ ],
+ "scorePercentiles" : {
+ "0.0" : 490.7129639878324,
+ "50.0" : 493.6863954119388,
+ "90.0" : 494.3884258252619,
+ "95.0" : 494.3884258252619,
+ "99.0" : 494.3884258252619,
+ "99.9" : 494.3884258252619,
+ "99.99" : 494.3884258252619,
+ "99.999" : 494.3884258252619,
+ "99.9999" : 494.3884258252619,
+ "100.0" : 494.3884258252619
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 493.6863954119388,
+ 492.6966235971648,
+ 490.7129639878324,
+ 494.3884258252619,
+ 494.1157608792294
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "float64",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 14.642728171265563,
+ "scoreError" : 0.3786592031928636,
+ "scoreConfidence" : [
+ 14.2640689680727,
+ 15.021387374458428
+ ],
+ "scorePercentiles" : {
+ "0.0" : 14.547127458647054,
+ "50.0" : 14.617635183386758,
+ "90.0" : 14.792616763059605,
+ "95.0" : 14.792616763059605,
+ "99.0" : 14.792616763059605,
+ "99.9" : 14.792616763059605,
+ "99.99" : 14.792616763059605,
+ "99.999" : 14.792616763059605,
+ "99.9999" : 14.792616763059605,
+ "100.0" : 14.792616763059605
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 14.547127458647054,
+ 14.617635183386758,
+ 14.573274428428881,
+ 14.792616763059605,
+ 14.682987022805511
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "float64",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 1080.8359386830703,
+ "scoreError" : 35.03166838916621,
+ "scoreConfidence" : [
+ 1045.804270293904,
+ 1115.8676070722365
+ ],
+ "scorePercentiles" : {
+ "0.0" : 1071.5023248018847,
+ "50.0" : 1078.4063772364734,
+ "90.0" : 1092.8710304751503,
+ "95.0" : 1092.8710304751503,
+ "99.0" : 1092.8710304751503,
+ "99.9" : 1092.8710304751503,
+ "99.99" : 1092.8710304751503,
+ "99.999" : 1092.8710304751503,
+ "99.9999" : 1092.8710304751503,
+ "100.0" : 1092.8710304751503
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 1071.5023248018847,
+ 1073.8747796864934,
+ 1092.8710304751503,
+ 1087.5251812153494,
+ 1078.4063772364734
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "rasterRef",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 12.188313148621546,
+ "scoreError" : 7.365297251370428,
+ "scoreConfidence" : [
+ 4.823015897251119,
+ 19.553610399991975
+ ],
+ "scorePercentiles" : {
+ "0.0" : 11.186523380550728,
+ "50.0" : 11.374585349686248,
+ "90.0" : 15.606331373536978,
+ "95.0" : 15.606331373536978,
+ "99.0" : 15.606331373536978,
+ "99.9" : 15.606331373536978,
+ "99.99" : 15.606331373536978,
+ "99.999" : 15.606331373536978,
+ "99.9999" : 15.606331373536978,
+ "100.0" : 15.606331373536978
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 11.186523380550728,
+ 11.357274436209732,
+ 11.374585349686248,
+ 11.416851203124038,
+ 15.606331373536978
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.encode",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "rasterRef",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 11.16416039038066,
+ "scoreError" : 0.6904470565404526,
+ "scoreConfidence" : [
+ 10.473713333840207,
+ 11.854607446921113
+ ],
+ "scorePercentiles" : {
+ "0.0" : 10.958926899271512,
+ "50.0" : 11.144335208904712,
+ "90.0" : 11.379572838567773,
+ "95.0" : 11.379572838567773,
+ "99.0" : 11.379572838567773,
+ "99.9" : 11.379572838567773,
+ "99.99" : 11.379572838567773,
+ "99.999" : 11.379572838567773,
+ "99.9999" : 11.379572838567773,
+ "100.0" : 11.379572838567773
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 11.30976879591712,
+ 11.379572838567773,
+ 11.144335208904712,
+ 11.02819820924218,
+ 10.958926899271512
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "uint8",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 4.762673330888573,
+ "scoreError" : 0.3629765650696548,
+ "scoreConfidence" : [
+ 4.399696765818918,
+ 5.125649895958228
+ ],
+ "scorePercentiles" : {
+ "0.0" : 4.654367944695545,
+ "50.0" : 4.771804106710553,
+ "90.0" : 4.888312020456609,
+ "95.0" : 4.888312020456609,
+ "99.0" : 4.888312020456609,
+ "99.9" : 4.888312020456609,
+ "99.99" : 4.888312020456609,
+ "99.999" : 4.888312020456609,
+ "99.9999" : 4.888312020456609,
+ "100.0" : 4.888312020456609
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 4.888312020456609,
+ 4.771804106710553,
+ 4.654367944695545,
+ 4.811061175897903,
+ 4.687821406682261
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "uint8",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 125.38841234041345,
+ "scoreError" : 12.251197281994925,
+ "scoreConfidence" : [
+ 113.13721505841853,
+ 137.6396096224084
+ ],
+ "scorePercentiles" : {
+ "0.0" : 121.32589970894979,
+ "50.0" : 126.16043763634758,
+ "90.0" : 129.26154882555105,
+ "95.0" : 129.26154882555105,
+ "99.0" : 129.26154882555105,
+ "99.9" : 129.26154882555105,
+ "99.99" : 129.26154882555105,
+ "99.999" : 129.26154882555105,
+ "99.9999" : 129.26154882555105,
+ "100.0" : 129.26154882555105
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 126.16043763634758,
+ 127.1243125222386,
+ 129.26154882555105,
+ 123.06986300898019,
+ 121.32589970894979
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "int32",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 18.333069815973175,
+ "scoreError" : 0.3341581136847717,
+ "scoreConfidence" : [
+ 17.998911702288403,
+ 18.667227929657948
+ ],
+ "scorePercentiles" : {
+ "0.0" : 18.226296598017225,
+ "50.0" : 18.31079228696827,
+ "90.0" : 18.426784947207096,
+ "95.0" : 18.426784947207096,
+ "99.0" : 18.426784947207096,
+ "99.9" : 18.426784947207096,
+ "99.99" : 18.426784947207096,
+ "99.999" : 18.426784947207096,
+ "99.9999" : 18.426784947207096,
+ "100.0" : 18.426784947207096
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 18.31079228696827,
+ 18.284332434448437,
+ 18.426784947207096,
+ 18.226296598017225,
+ 18.417142813224846
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "int32",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 1211.7797319906595,
+ "scoreError" : 49.26483544063887,
+ "scoreConfidence" : [
+ 1162.5148965500207,
+ 1261.0445674312982
+ ],
+ "scorePercentiles" : {
+ "0.0" : 1195.925568730576,
+ "50.0" : 1210.8324462729913,
+ "90.0" : 1226.1002647058824,
+ "95.0" : 1226.1002647058824,
+ "99.0" : 1226.1002647058824,
+ "99.9" : 1226.1002647058824,
+ "99.99" : 1226.1002647058824,
+ "99.999" : 1226.1002647058824,
+ "99.9999" : 1226.1002647058824,
+ "100.0" : 1226.1002647058824
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 1210.8324462729913,
+ 1195.925568730576,
+ 1222.877439173493,
+ 1226.1002647058824,
+ 1203.1629410703547
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "float32",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 20.739003038529034,
+ "scoreError" : 9.965728861183356,
+ "scoreConfidence" : [
+ 10.773274177345678,
+ 30.704731899712392
+ ],
+ "scorePercentiles" : {
+ "0.0" : 18.961133792379307,
+ "50.0" : 19.19435088399221,
+ "90.0" : 24.96128293474057,
+ "95.0" : 24.96128293474057,
+ "99.0" : 24.96128293474057,
+ "99.9" : 24.96128293474057,
+ "99.99" : 24.96128293474057,
+ "99.999" : 24.96128293474057,
+ "99.9999" : 24.96128293474057,
+ "100.0" : 24.96128293474057
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 19.061690043075515,
+ 19.19435088399221,
+ 18.961133792379307,
+ 21.516557538457565,
+ 24.96128293474057
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "float32",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 1224.738164769416,
+ "scoreError" : 126.4638940057822,
+ "scoreConfidence" : [
+ 1098.274270763634,
+ 1351.202058775198
+ ],
+ "scorePercentiles" : {
+ "0.0" : 1181.9325521805933,
+ "50.0" : 1229.8881532030002,
+ "90.0" : 1268.8545514967022,
+ "95.0" : 1268.8545514967022,
+ "99.0" : 1268.8545514967022,
+ "99.9" : 1268.8545514967022,
+ "99.99" : 1268.8545514967022,
+ "99.999" : 1268.8545514967022,
+ "99.9999" : 1268.8545514967022,
+ "100.0" : 1268.8545514967022
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 1268.8545514967022,
+ 1237.1361243971808,
+ 1205.8794425696035,
+ 1229.8881532030002,
+ 1181.9325521805933
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "float64",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 41.35522075338555,
+ "scoreError" : 1.5888146686882507,
+ "scoreConfidence" : [
+ 39.7664060846973,
+ 42.9440354220738
+ ],
+ "scorePercentiles" : {
+ "0.0" : 40.89423389193166,
+ "50.0" : 41.27828040884876,
+ "90.0" : 42.01334043026831,
+ "95.0" : 42.01334043026831,
+ "99.0" : 42.01334043026831,
+ "99.9" : 42.01334043026831,
+ "99.99" : 42.01334043026831,
+ "99.999" : 42.01334043026831,
+ "99.9999" : 42.01334043026831,
+ "100.0" : 42.01334043026831
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 40.89423389193166,
+ 41.189732083674706,
+ 42.01334043026831,
+ 41.27828040884876,
+ 41.40051695220435
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "float64",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 3401.0174112275795,
+ "scoreError" : 497.2925126951152,
+ "scoreConfidence" : [
+ 2903.724898532464,
+ 3898.309923922695
+ ],
+ "scorePercentiles" : {
+ "0.0" : 3234.1288173884936,
+ "50.0" : 3398.0858393887947,
+ "90.0" : 3570.2330531573316,
+ "95.0" : 3570.2330531573316,
+ "99.0" : 3570.2330531573316,
+ "99.9" : 3570.2330531573316,
+ "99.99" : 3570.2330531573316,
+ "99.999" : 3570.2330531573316,
+ "99.9999" : 3570.2330531573316,
+ "100.0" : 3570.2330531573316
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 3472.802212426241,
+ 3570.2330531573316,
+ 3398.0858393887947,
+ 3234.1288173884936,
+ 3329.837133777038
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "rasterRef",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 27.695579291790057,
+ "scoreError" : 1.727793688261407,
+ "scoreConfidence" : [
+ 25.96778560352865,
+ 29.423372980051465
+ ],
+ "scorePercentiles" : {
+ "0.0" : 27.169023039660257,
+ "50.0" : 27.78487872459196,
+ "90.0" : 28.259312501235915,
+ "95.0" : 28.259312501235915,
+ "99.0" : 28.259312501235915,
+ "99.9" : 28.259312501235915,
+ "99.99" : 28.259312501235915,
+ "99.999" : 28.259312501235915,
+ "99.9999" : 28.259312501235915,
+ "100.0" : 28.259312501235915
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 27.78487872459196,
+ 28.259312501235915,
+ 27.320646766780545,
+ 27.169023039660257,
+ 27.9440354266816
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.TileEncodeBench.roundTrip",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "rasterRef",
+ "tileSize" : "512"
+ },
+ "primaryMetric" : {
+ "score" : 28.004859122985067,
+ "scoreError" : 0.7553941535592298,
+ "scoreConfidence" : [
+ 27.249464969425837,
+ 28.760253276544297
+ ],
+ "scorePercentiles" : {
+ "0.0" : 27.752906370782245,
+ "50.0" : 27.946522581888676,
+ "90.0" : 28.21079035273483,
+ "95.0" : 28.21079035273483,
+ "99.0" : 28.21079035273483,
+ "99.9" : 28.21079035273483,
+ "99.99" : 28.21079035273483,
+ "99.999" : 28.21079035273483,
+ "99.9999" : 28.21079035273483,
+ "100.0" : 28.21079035273483
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 28.21079035273483,
+ 27.917038521558283,
+ 28.19703778796129,
+ 27.946522581888676,
+ 27.752906370782245
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ }
+]
+
+
diff --git a/bench/archive/jmh-results-20190207140524.json b/bench/archive/jmh-results-20190207140524.json
new file mode 100644
index 000000000..a9b0ea424
--- /dev/null
+++ b/bench/archive/jmh-results-20190207140524.json
@@ -0,0 +1,118 @@
+[
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.BinaryTileOpBench.viaExpression",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "uint16ud255",
+ "numTiles" : "100",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 59.48713747203768,
+ "scoreError" : 1.682438644557827,
+ "scoreConfidence" : [
+ 57.804698827479854,
+ 61.16957611659551
+ ],
+ "scorePercentiles" : {
+ "0.0" : 59.08482080588235,
+ "50.0" : 59.30686591715976,
+ "90.0" : 60.0763132994012,
+ "95.0" : 60.0763132994012,
+ "99.0" : 60.0763132994012,
+ "99.9" : 60.0763132994012,
+ "99.99" : 60.0763132994012,
+ "99.999" : 60.0763132994012,
+ "99.9999" : 60.0763132994012,
+ "100.0" : 60.0763132994012
+ },
+ "scoreUnit" : "ms/op",
+ "rawData" : [
+ [
+ 59.30686591715976,
+ 59.08482080588235,
+ 59.81708820833333,
+ 60.0763132994012,
+ 59.150599129411766
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.BinaryTileOpBench.viaUdf",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "uint16ud255",
+ "numTiles" : "100",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 56.46931398875338,
+ "scoreError" : 2.4604658224787643,
+ "scoreConfidence" : [
+ 54.00884816627461,
+ 58.929779811232144
+ ],
+ "scorePercentiles" : {
+ "0.0" : 55.630841994444445,
+ "50.0" : 56.575462519774014,
+ "90.0" : 57.347593422857145,
+ "95.0" : 57.347593422857145,
+ "99.0" : 57.347593422857145,
+ "99.9" : 57.347593422857145,
+ "99.99" : 57.347593422857145,
+ "99.999" : 57.347593422857145,
+ "99.9999" : 57.347593422857145,
+ "100.0" : 57.347593422857145
+ },
+ "scoreUnit" : "ms/op",
+ "rawData" : [
+ [
+ 57.347593422857145,
+ 55.630841994444445,
+ 56.13477540782123,
+ 56.65789659887005,
+ 56.575462519774014
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ }
+]
+
+
diff --git a/bench/archive/jmh-results-20190224115207.json b/bench/archive/jmh-results-20190224115207.json
new file mode 100644
index 000000000..7aed87d40
--- /dev/null
+++ b/bench/archive/jmh-results-20190224115207.json
@@ -0,0 +1,118 @@
+[
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.BinaryTileOpBench.viaExpression",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "uint16ud255",
+ "numTiles" : "100",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 60.24139353949952,
+ "scoreError" : 3.2516181730645295,
+ "scoreConfidence" : [
+ 56.98977536643499,
+ 63.49301171256405
+ ],
+ "scorePercentiles" : {
+ "0.0" : 59.459361017751476,
+ "50.0" : 60.078545143712574,
+ "90.0" : 61.480693024539875,
+ "95.0" : 61.480693024539875,
+ "99.0" : 61.480693024539875,
+ "99.9" : 61.480693024539875,
+ "99.99" : 61.480693024539875,
+ "99.999" : 61.480693024539875,
+ "99.9999" : 61.480693024539875,
+ "100.0" : 61.480693024539875
+ },
+ "scoreUnit" : "ms/op",
+ "rawData" : [
+ [
+ 59.459361017751476,
+ 60.65764786060606,
+ 60.078545143712574,
+ 59.530720650887574,
+ 61.480693024539875
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "astraea.spark.rasterframes.bench.BinaryTileOpBench.viaUdf",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "params" : {
+ "cellTypeName" : "uint16ud255",
+ "numTiles" : "100",
+ "tileSize" : "64"
+ },
+ "primaryMetric" : {
+ "score" : 58.69963037452669,
+ "scoreError" : 2.0485509148735965,
+ "scoreConfidence" : [
+ 56.6510794596531,
+ 60.748181289400286
+ ],
+ "scorePercentiles" : {
+ "0.0" : 58.22195943604651,
+ "50.0" : 58.52202024561404,
+ "90.0" : 59.53467150595238,
+ "95.0" : 59.53467150595238,
+ "99.0" : 59.53467150595238,
+ "99.9" : 59.53467150595238,
+ "99.99" : 59.53467150595238,
+ "99.999" : 59.53467150595238,
+ "99.9999" : 59.53467150595238,
+ "100.0" : 59.53467150595238
+ },
+ "scoreUnit" : "ms/op",
+ "rawData" : [
+ [
+ 58.891654411764705,
+ 58.52202024561404,
+ 58.22195943604651,
+ 58.327846273255815,
+ 59.53467150595238
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ }
+]
+
+
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala b/bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala
new file mode 100644
index 000000000..133d93356
--- /dev/null
+++ b/bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala
@@ -0,0 +1,67 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.bench
+import java.util.concurrent.TimeUnit
+
+import astraea.spark.rasterframes.expressions.localops._
+import astraea.spark.rasterframes._
+import geotrellis.raster.Tile
+import geotrellis.raster.mapalgebra.{local => gt}
+import org.apache.spark.sql._
+import org.apache.spark.sql.functions._
+import org.openjdk.jmh.annotations._
+@BenchmarkMode(Array(Mode.AverageTime))
+@State(Scope.Benchmark)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+class BinaryTileOpBench extends SparkEnv {
+ import spark.implicits._
+
+ @Param(Array("uint16ud255"))
+ var cellTypeName: String = _
+
+ @Param(Array("64"))
+ var tileSize: Int = _
+
+ @Param(Array("100"))
+ var numTiles: Int = _
+
+ @transient
+ var tiles: DataFrame = _
+
+ val localAddUDF = udf((left: Tile, right: Tile) => gt.Add(left, right))
+
+ @Setup(Level.Trial)
+ def setupData(): Unit = {
+ tiles = Seq.fill(numTiles)((randomTile(tileSize, tileSize, cellTypeName), randomTile(tileSize, tileSize, cellTypeName)))
+ .toDF("left", "right").repartition(10)
+ }
+
+ @Benchmark
+ def viaExpression(): Array[Tile] = {
+ tiles.select(Add($"left", $"right")).collect()
+ }
+
+ @Benchmark
+ def viaUdf(): Array[Tile] = {
+ tiles.select(localAddUDF($"left", $"right").as[Tile]).collect()
+ }
+}
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala b/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala
index c5fc316d2..c68c826e8 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala
+++ b/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala
@@ -26,7 +26,7 @@ import java.util.concurrent.TimeUnit
import astraea.spark.rasterframes
import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.RasterSourceToTiles
+import astraea.spark.rasterframes.expressions.transformers.RasterSourceToTiles
import astraea.spark.rasterframes.ref.RasterSource
import astraea.spark.rasterframes.ref.RasterSource.ReadCallback
import com.typesafe.scalalogging.LazyLogging
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala b/bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala
index 721f71e63..c9aa7eef4 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala
+++ b/bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala
@@ -22,6 +22,7 @@ package astraea.spark.rasterframes.bench
import java.util.concurrent.TimeUnit
import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.stats.CellHistogram
import org.apache.spark.sql._
import org.openjdk.jmh.annotations._
@@ -54,25 +55,28 @@ class StatsComputeBench extends SparkEnv {
.toDF("tile").repartition(10)
}
- @Benchmark
- def computeStats() = {
- tiles.select(agg_stats($"tile")).collect()
- }
+// @Benchmark
+// def computeStats(): Array[CellStatistics] = {
+// tiles.select(agg_stats($"tile")).collect()
+// }
@Benchmark
- def extractMean() = {
- tiles.select(agg_stats($"tile").getField("mean")).map(_.getDouble(0)).collect()
+ def computeHistogram(): Array[CellHistogram] = {
+ tiles.select(agg_approx_histogram($"tile")).collect()
}
- @Benchmark
- def directMean() = {
- tiles.repartition(10).select(agg_mean($"tile")).collect()
- }
+// @Benchmark
+// def extractMean(): Array[Double] = {
+// tiles.select(agg_stats($"tile").getField("mean")).map(_.getDouble(0)).collect()
+// }
+//
+// @Benchmark
+// def directMean(): Array[Double] = {
+// tiles.repartition(10).select(agg_mean($"tile")).collect()
+// }
// @Benchmark
// def computeCounts() = {
// tiles.toDF("tile").select(data_cells($"tile") as "counts").agg(sum($"counts")).collect()
// }
-
-
}
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala b/bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala
index 11069d635..ebd4f169c 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala
+++ b/bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala
@@ -56,7 +56,7 @@ class TileExplodeBench extends SparkEnv {
@Benchmark
def arrayExplode() = {
- tiles.select(posexplode(tile_to_array[Double]($"tile"))).count()
+ tiles.select(posexplode(tile_to_array_double($"tile"))).count()
}
@Benchmark
diff --git a/build.sbt b/build.sbt
index ecf8948d5..05aed4e8d 100644
--- a/build.sbt
+++ b/build.sbt
@@ -12,7 +12,12 @@ lazy val deployment = project
.dependsOn(root)
.disablePlugins(SparkPackagePlugin)
+lazy val IntegrationTest = config("it") extend Test
+
lazy val core = project
+ .configs(IntegrationTest)
+ .settings(inConfig(IntegrationTest)(Defaults.testSettings))
+ .settings(Defaults.itSettings)
.disablePlugins(SparkPackagePlugin)
lazy val pyrasterframes = project
diff --git a/core/src/it/resources/log4j.properties b/core/src/it/resources/log4j.properties
new file mode 100644
index 000000000..378ae8e61
--- /dev/null
+++ b/core/src/it/resources/log4j.properties
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the console
+log4j.rootCategory=WARN, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Set the default spark-shell log level to WARN. When running the spark-shell, the
+# log level for this class is used to overwrite the root logger's log level, so that
+# the user can have different defaults for the shell and regular Spark apps.
+log4j.logger.org.apache.spark.repl.Main=WARN
+
+
+log4j.logger.org.apache=ERROR
+log4j.logger.com.amazonaws=WARN
+log4j.logger.geotrellis=INFO
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+log4j.logger.astraea.spark.rasterframes=DEBUG
+log4j.logger.astraea.spark.rasterframes.ref=TRACE
+log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF
+
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
+log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
diff --git a/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala b/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala
new file mode 100644
index 000000000..6f9069183
--- /dev/null
+++ b/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala
@@ -0,0 +1,61 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.ref
+
+import java.net.URI
+
+import astraea.spark.rasterframes.TestEnvironment.ReadMonitor
+import astraea.spark.rasterframes.ref.RasterSource.FileGeoTiffRasterSource
+import astraea.spark.rasterframes.{TestData, TestEnvironment}
+import geotrellis.raster.io.geotiff.GeoTiff
+import geotrellis.vector.Extent
+import org.apache.spark.sql.rf.RasterSourceUDT
+
+/**
+ *
+ *
+ * @since 8/22/18
+ */
+class RasterSourceIT extends TestEnvironment with TestData {
+ def sub(e: Extent) = {
+ val c = e.center
+ val w = e.width
+ val h = e.height
+ Extent(c.x, c.y, c.x + w * 0.1, c.y + h * 0.1)
+ }
+
+ describe("RasterSource.readAll") {
+ it("should return consistently ordered tiles across bands for a given scene") {
+ // These specific scenes exhibit the problem where we see different subtile segment ordering across the bands of a given scene.
+ val rURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B4.TIF")
+ val bURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B2.TIF")
+
+ val red = RasterSource(rURI).readAll().left.get
+ val blue = RasterSource(bURI).readAll().left.get
+
+ red should not be empty
+ red.size should equal(blue.size)
+
+ red.map(_.dimensions) should contain theSameElementsAs blue.map(_.dimensions)
+ }
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala b/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala
index e97b86267..ff08dd44c 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala
@@ -19,11 +19,15 @@
package astraea.spark.rasterframes
-import astraea.spark.rasterframes.encoders.SparkDefaultEncoders
-import astraea.spark.rasterframes.expressions.ReprojectGeometry
-import astraea.spark.rasterframes.functions.{CellCountAggregate, CellMeanAggregate, CellStatsAggregate}
+import astraea.spark.rasterframes.expressions.TileAssembler
+import astraea.spark.rasterframes.expressions.accessors._
+import astraea.spark.rasterframes.expressions.aggstats._
+import astraea.spark.rasterframes.expressions.generators._
+import astraea.spark.rasterframes.expressions.localops._
+import astraea.spark.rasterframes.expressions.tilestats._
+import astraea.spark.rasterframes.expressions.transformers._
import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics}
-import astraea.spark.rasterframes.{expressions => E, functions => F}
+import astraea.spark.rasterframes.{functions => F}
import com.vividsolutions.jts.geom.{Envelope, Geometry}
import geotrellis.proj4.CRS
import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
@@ -32,16 +36,14 @@ import org.apache.spark.annotation.Experimental
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
-import scala.reflect.runtime.universe._
-
/**
* UDFs for working with Tiles in Spark DataFrames.
*
* @since 4/3/17
*/
trait RasterFunctions {
- import SparkDefaultEncoders._
import util._
+ import PrimitiveEncoders._
// format: off
/** Create a row for each cell in Tile. */
@@ -49,23 +51,25 @@ trait RasterFunctions {
/** Create a row for each cell in Tile with random sampling and optional seed. */
def explode_tiles_sample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column =
- E.ExplodeTiles(sampleFraction, seed, cols)
+ ExplodeTiles(sampleFraction, seed, cols)
/** Create a row for each cell in Tile with random sampling (no seed). */
def explode_tiles_sample(sampleFraction: Double, cols: Column*): Column =
- E.ExplodeTiles(sampleFraction, None, cols)
+ ExplodeTiles(sampleFraction, None, cols)
/** Query the number of (cols, rows) in a Tile. */
- def tile_dimensions(col: Column): Column = E.GetDimensions(col)
+ def tile_dimensions(col: Column): Column = GetDimensions(col)
/** Extracts the bounding box of a geometry as a JTS envelope. */
- def envelope(col: Column): TypedColumn[Any, Envelope] = E.GetEnvelope(col)
+ def envelope(col: Column): TypedColumn[Any, Envelope] = GetEnvelope(col)
- /** Flattens Tile into an array. A numeric type parameter is required. */
- @Experimental
- def tile_to_array[T: HasCellType: TypeTag](col: Column): TypedColumn[Any, Array[T]] = withAlias("tile_to_array", col)(
- udf[Array[T], Tile](F.tileToArray).apply(col)
- ).as[Array[T]]
+ /** Flattens Tile into a double array. */
+ def tile_to_array_double(col: Column): TypedColumn[Any, Array[Double]] =
+ TileToArrayDouble(col)
+
+ /** Flattens Tile into an integer array. */
+ def tile_to_array_int(col: Column): TypedColumn[Any, Array[Double]] =
+ TileToArrayDouble(col)
@Experimental
/** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/
@@ -75,204 +79,125 @@ trait RasterFunctions {
/** Create a Tile from a column of cell data with location indexes and preform cell conversion. */
def assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] =
- convert_cell_type(F.TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile]
+ convert_cell_type(TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile](singlebandTileEncoder)
/** Create a Tile from a column of cell data with location indexes. */
def assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] =
- F.TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows)
+ TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows)
/** Extract the Tile's cell type */
- def cell_type(col: Column): TypedColumn[Any, CellType] = E.GetCellType(col)
+ def cell_type(col: Column): TypedColumn[Any, CellType] = GetCellType(col)
/** Change the Tile's cell type */
def convert_cell_type(col: Column, cellType: CellType): TypedColumn[Any, Tile] =
- E.SetCellType(col, cellType)
+ SetCellType(col, cellType)
/** Change the Tile's cell type */
def convert_cell_type(col: Column, cellTypeName: String): TypedColumn[Any, Tile] =
- E.SetCellType(col, cellTypeName)
+ SetCellType(col, cellTypeName)
/** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */
- def bounds_geometry(bounds: Column): TypedColumn[Any, Geometry] = E.BoundsToGeometry(bounds)
+ def bounds_geometry(bounds: Column): TypedColumn[Any, Geometry] = BoundsToGeometry(bounds)
/** Assign a `NoData` value to the Tiles. */
- def with_no_data(col: Column, nodata: Double) = withAlias("with_no_data", col)(
+ def with_no_data(col: Column, nodata: Double): TypedColumn[Any, Tile] = withAlias("with_no_data", col)(
udf[Tile, Tile](F.withNoData(nodata)).apply(col)
).as[Tile]
/** Compute the full column aggregate floating point histogram. */
- def agg_histogram(col: Column): TypedColumn[Any, CellHistogram] =
- withAlias("histogram", col)(
- F.aggHistogram(col)
- ).as[CellHistogram]
+ def agg_approx_histogram(col: Column): TypedColumn[Any, CellHistogram] =
+ HistogramAggregate(col)
/** Compute the full column aggregate floating point statistics. */
- def agg_stats(col: Column): TypedColumn[Any, CellStatistics] = withAlias("agg_stats", col)(
- F.aggStats(col)
- ).as[CellStatistics]
+ def agg_stats(col: Column): TypedColumn[Any, CellStatistics] =
+ CellStatsAggregate(col)
/** Computes the column aggregate mean. */
def agg_mean(col: Column) = CellMeanAggregate(col)
/** Computes the number of non-NoData cells in a column. */
- def agg_data_cells(col: Column) = CellCountAggregate(true, col)
+ def agg_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.DataCells(col)
/** Computes the number of NoData cells in a column. */
- def agg_no_data_cells(col: Column) = CellCountAggregate(false, col)
+ def agg_no_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.NoDataCells(col)
/** Compute the Tile-wise mean */
def tile_mean(col: Column): TypedColumn[Any, Double] =
- withAlias("tile_mean", col)(
- udf[Double, Tile](F.tileMean).apply(col)
- ).as[Double]
+ TileMean(col)
/** Compute the Tile-wise sum */
def tile_sum(col: Column): TypedColumn[Any, Double] =
- withAlias("tile_sum", col)(
- udf[Double, Tile](F.tileSum).apply(col)
- ).as[Double]
+ Sum(col)
/** Compute the minimum cell value in tile. */
def tile_min(col: Column): TypedColumn[Any, Double] =
- withAlias("tile_min", col)(
- udf[Double, Tile](F.tileMin).apply(col)
- ).as[Double]
+ TileMin(col)
/** Compute the maximum cell value in tile. */
def tile_max(col: Column): TypedColumn[Any, Double] =
- withAlias("tile_max", col)(
- udf[Double, Tile](F.tileMax).apply(col)
- ).as[Double]
+ TileMax(col)
/** Compute TileHistogram of Tile values. */
def tile_histogram(col: Column): TypedColumn[Any, CellHistogram] =
- withAlias("tile_histogram", col)(
- udf[CellHistogram, Tile](F.tileHistogram).apply(col)
- ).as[CellHistogram]
+ TileHistogram(col)
/** Compute statistics of Tile values. */
def tile_stats(col: Column): TypedColumn[Any, CellStatistics] =
- withAlias("tile_stats", col)(
- udf[CellStatistics, Tile](F.tileStats).apply(col)
- ).as[CellStatistics]
+ TileStats(col)
/** Counts the number of non-NoData cells per Tile. */
def data_cells(tile: Column): TypedColumn[Any, Long] =
- withAlias("data_cells", tile)(
- udf(F.dataCells).apply(tile)
- ).as[Long]
+ DataCells(tile)
/** Counts the number of NoData cells per Tile. */
def no_data_cells(tile: Column): TypedColumn[Any, Long] =
- withAlias("no_data_cells", tile)(
- udf(F.noDataCells).apply(tile)
- ).as[Long]
-
+ NoDataCells(tile)
def is_no_data_tile(tile: Column): TypedColumn[Any, Boolean] =
- withAlias("is_no_data_tile", tile)(
- udf(F.isNoDataTile).apply(tile)
- ).as[Boolean]
+ IsNoDataTile(tile)
/** Compute cell-local aggregate descriptive statistics for a column of Tiles. */
- def local_agg_stats(col: Column): Column =
- withAlias("local_agg_stats", col)(
- F.localAggStats(col)
- )
+ def agg_local_stats(col: Column) =
+ LocalStatsAggregate(col)
/** Compute the cell-wise/local max operation between Tiles in a column. */
- def local_agg_max(col: Column): TypedColumn[Any, Tile] =
- withAlias("local_agg_max", col)(
- F.localAggMax(col)
- ).as[Tile]
+ def agg_local_max(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMaxUDAF(col)
/** Compute the cellwise/local min operation between Tiles in a column. */
- def local_agg_min(col: Column): TypedColumn[Any, Tile] =
- withAlias("local_agg_min", col)(
- F.localAggMin(col)
- ).as[Tile]
+ def agg_local_min(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMinUDAF(col)
/** Compute the cellwise/local mean operation between Tiles in a column. */
- def local_agg_mean(col: Column): TypedColumn[Any, Tile] =
- withAlias("local_agg_mean", col)(
- F.localAggMean(col)
- ).as[Tile]
+ def agg_local_mean(col: Column): TypedColumn[Any, Tile] = LocalMeanAggregate(col)
/** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */
- def local_agg_data_cells(col: Column): TypedColumn[Any, Tile] =
- withAlias("local_agg_data_cells", col)(
- F.localAggCount(col)
- ).as[Tile]
+ def agg_local_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalDataCellsUDAF(col)
/** Compute the cellwise/local count of NoData cells for all Tiles in a column. */
- def local_agg_no_data_cells(col: Column): TypedColumn[Any, Tile] =
- withAlias("local_agg_no_data_cells", col)(
- F.localAggNodataCount(col)
- ).as[Tile]
-
- /** Cellwise addition between two Tiles. */
- def local_add(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_add", left, right)(
- udf(F.localAdd).apply(left, right)
- ).as[Tile]
+ def agg_local_no_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalNoDataCellsUDAF(col)
- /** Cellwise addition of a scalar to a tile. */
- def local_add_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match {
- case i: Int => F.localAddScalarInt(_: Tile, i)
- case d: Double => F.localAddScalar(_: Tile, d)
- }
+ /** Cellwise addition between two Tiles or Tile and scalar column. */
+ def local_add(left: Column, right: Column): TypedColumn[Any, Tile] = Add(left, right)
- udf(f).apply(tileCol).as(s"local_add_scalar($tileCol, $value)").as[Tile]
- }
+ /** Cellwise addition of a scalar value to a tile. */
+ def local_add[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Add(tileCol, value)
/** Cellwise subtraction between two Tiles. */
- def local_subtract(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_subtract", left, right)(
- udf(F.localSubtract).apply(left, right)
- ).as[Tile]
-
- /** Cellwise subtraction of a scalar from a tile. */
- def local_subtract_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match {
- case i: Int => F.localSubtractScalarInt(_: Tile, i)
- case d: Double => F.localSubtractScalar(_: Tile, d)
- }
+ def local_subtract(left: Column, right: Column): TypedColumn[Any, Tile] = Subtract(left, right)
- udf(f).apply(tileCol).as(s"local_subtract_scalar($tileCol, $value)").as[Tile]
- }
+ /** Cellwise subtraction of a scalar value from a tile. */
+ def local_subtract[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Subtract(tileCol, value)
/** Cellwise multiplication between two Tiles. */
- def local_multiply(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_multiply", left, right)(
- udf(F.localMultiply).apply(left, right)
- ).as[Tile]
+ def local_multiply(left: Column, right: Column): TypedColumn[Any, Tile] = Multiply(left, right)
- /** Cellwise multiplication of a tile by a scalar. */
- def local_multiply_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match {
- case i: Int => F.localMultiplyScalarInt(_: Tile, i)
- case d: Double => F.localMultiplyScalar(_: Tile, d)
- }
-
- udf(f).apply(tileCol).as(s"local_multiply_scalar($tileCol, $value)").as[Tile]
- }
+ /** Cellwise multiplication of a tile by a scalar value. */
+ def local_multiply[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Multiply(tileCol, value)
/** Cellwise division between two Tiles. */
- def local_divide(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_divide", left, right)(
- udf(F.localDivide).apply(left, right)
- ).as[Tile]
-
- /** Cellwise division of a tile by a scalar. */
- def local_divide_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match {
- case i: Int => F.localDivideScalarInt(_: Tile, i)
- case d: Double => F.localDivideScalar(_: Tile, d)
- }
+ def local_divide(left: Column, right: Column): TypedColumn[Any, Tile] = Divide(left, right)
- udf(f).apply(tileCol).as(s"local_divide_scalar($tileCol, $value)").as[Tile]
- }
+ /** Cellwise division of a tile by a scalar value. */
+ def local_divide[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Divide(tileCol, value)
/** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */
def local_algebra(op: LocalTileBinaryOp, left: Column, right: Column):
@@ -282,10 +207,8 @@ trait RasterFunctions {
).as[Tile]
/** Compute the normalized difference of two tile columns */
- def normalized_difference(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("normalized_difference", left, right)(
- udf(F.normalizedDifference).apply(left, right)
- ).as[Tile]
+ def normalized_difference(left: Column, right: Column) =
+ NormalizedDifference(left, right)
/** Constructor for constant tile column */
def make_constant_tile(value: Number, cols: Int, rows: Int, cellType: String): TypedColumn[Any, Tile] =
@@ -301,21 +224,15 @@ trait RasterFunctions {
/** Where the mask tile contains NODATA, replace values in the source tile with NODATA */
def mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
- withAlias("mask", sourceTile, maskTile)(
- udf(F.mask).apply(sourceTile, maskTile)
- ).as[Tile]
+ Mask.MaskByDefined(sourceTile, maskTile)
/** Where the mask tile equals the mask value, replace values in the source tile with NODATA */
def mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
- withAlias("mask_by_value", sourceTile, maskTile, maskValue)(
- udf(F.maskByValue).apply(sourceTile, maskTile, maskValue)
- ).as[Tile]
+ Mask.MaskByValue(sourceTile, maskTile, maskValue)
/** Where the mask tile DOES NOT contain NODATA, replace values in the source tile with NODATA */
def inverse_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
- withAlias("inverse_mask", sourceTile, maskTile)(
- udf(F.inverseMask).apply(sourceTile, maskTile)
- ).as[Tile]
+ Mask.InverseMaskByDefined(sourceTile, maskTile)
/** Create a tile where cells in the grid defined by cols, rows, and bounds are filled with the given value. */
def rasterize(geometry: Column, bounds: Column, value: Column, cols: Int, rows: Int): TypedColumn[Any, Tile] =
@@ -335,101 +252,102 @@ trait RasterFunctions {
def reproject_geometry(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] =
ReprojectGeometry(sourceGeom, srcCRS, dstCRS)
- /** Render Tile as ASCII string for debugging purposes. */
- @Experimental
+ /** Render Tile as ASCII string, for debugging purposes. */
def render_ascii(col: Column): TypedColumn[Any, String] =
- withAlias("render_ascii", col)(
- udf[String, Tile](F.renderAscii).apply(col)
- ).as[String]
+ DebugRender.RenderAscii(col)
+
+ /** Render Tile cell values as numeric values, for debugging purposes. */
+ def render_matrix(col: Column): TypedColumn[Any, String] =
+ DebugRender.RenderMatrix(col)
/** Cellwise less than value comparison between two tiles. */
def local_less(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_less", left, right)(
- udf(F.localLess).apply(left, right)
- ).as[Tile]
-
+ Less(left, right)
/** Cellwise less than value comparison between a tile and a scalar. */
- def local_less_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int ⇒ F.localLessScalarInt(_: Tile, i)
- case d: Double ⇒ F.localLessScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"local_less_scalar($tileCol, $value)").as[Tile]
- }
+ def local_less[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ Less(tileCol, value)
/** Cellwise less than or equal to value comparison between a tile and a scalar. */
def local_less_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_less_equal", left, right)(
- udf(F.localLess).apply(left, right)
- ).as[Tile]
+ LessEqual(left, right)
/** Cellwise less than or equal to value comparison between a tile and a scalar. */
- def local_less_equal_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int ⇒ F.localLessEqualScalarInt(_: Tile, i)
- case d: Double ⇒ F.localLessEqualScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"local_less_equal_scalar($tileCol, $value)").as[Tile]
- }
+ def local_less_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ LessEqual(tileCol, value)
/** Cellwise greater than value comparison between two tiles. */
def local_greater(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_greater", left, right)(
- udf(F.localGreater).apply(left, right)
- ).as[Tile]
-
+ Greater(left, right)
/** Cellwise greater than value comparison between a tile and a scalar. */
- def local_greater_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int ⇒ F.localGreaterScalarInt(_: Tile, i)
- case d: Double ⇒ F.localGreaterScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"local_greater_scalar($tileCol, $value)").as[Tile]
- }
+ def local_greater[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ Greater(tileCol, value)
/** Cellwise greater than or equal to value comparison between two tiles. */
def local_greater_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_greater_equal", left, right)(
- udf(F.localGreaterEqual).apply(left, right)
- ).as[Tile]
+ GreaterEqual(left, right)
/** Cellwise greater than or equal to value comparison between a tile and a scalar. */
- def local_greater_equal_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int ⇒ F.localGreaterEqualScalarInt(_: Tile, i)
- case d: Double ⇒ F.localGreaterEqualScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"local_greater_equal_scalar($tileCol, $value)").as[Tile]
- }
+ def local_greater_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ GreaterEqual(tileCol, value)
/** Cellwise equal to value comparison between two tiles. */
def local_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_equal", left, right)(
- udf(F.localEqual).apply(left, right)
- ).as[Tile]
+ Equal(left, right)
/** Cellwise equal to value comparison between a tile and a scalar. */
- def local_equal_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int ⇒ F.localEqualScalarInt(_: Tile, i)
- case d: Double ⇒ F.localEqualScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"local_equal_scalar($tileCol, $value)").as[Tile]
- }
+ def local_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ Equal(tileCol, value)
+
/** Cellwise inequality comparison between two tiles. */
def local_unequal(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_unequal", left, right)(
- udf(F.localUnequal).apply(left, right)
- ).as[Tile]
+ Unequal(left, right)
/** Cellwise inequality comparison between a tile and a scalar. */
- def local_unequal_scalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int ⇒ F.localUnequalScalarInt(_: Tile, i)
- case d: Double ⇒ F.localUnequalScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"local_unequal_scalar($tileCol, $value)").as[Tile]
- }
+ def local_unequal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ Unequal(tileCol, value)
+
+ /** Round cell values to nearest integer without chaning cell type. */
+ def round(tileCol: Column): TypedColumn[Any, Tile] =
+ Round(tileCol)
+
+ /** Take natural logarithm of cell values. */
+ def log(tileCol: Column): TypedColumn[Any, Tile] =
+ Log(tileCol)
+
+ /** Take base 10 logarithm of cell values. */
+ def log10(tileCol: Column): TypedColumn[Any, Tile] =
+ Log10(tileCol)
+
+ /** Take base 2 logarithm of cell values. */
+ def log2(tileCol: Column): TypedColumn[Any, Tile] =
+ Log2(tileCol)
+
+ /** Natural logarithm of one plus cell values. */
+ def log1p(tileCol: Column): TypedColumn[Any, Tile] =
+ Log1p(tileCol)
+
+ /** Exponential of cell values */
+ def exp(tileCol: Column): TypedColumn[Any, Tile] =
+ Exp(tileCol)
+
+ /** Ten to the power of cell values */
+ def exp10(tileCol: Column): TypedColumn[Any, Tile] =
+ Exp10(tileCol)
+
+ /** Two to the power of cell values */
+ def exp2(tileCol: Column): TypedColumn[Any, Tile] =
+ Exp2(tileCol)
+
+ /** Exponential of cell values, less one*/
+ def expm1(tileCol: Column): TypedColumn[Any, Tile] =
+ ExpM1(tileCol)
+
+ /** Resample tile using nearest-neighbor */
+ def resample[T: Numeric](tileCol: Column, value: T) = Resample(tileCol, value)
+
+ /** Resample tile using nearest-neighbor */
+ def resample(tileCol: Column, column2: Column) = Resample(tileCol, column2)
+
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala b/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala
index 2a0104b8e..340b17198 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala
@@ -6,17 +6,16 @@ import geotrellis.raster.{Tile, TileFeature}
import geotrellis.spark.{SpatialKey, TemporalKey}
import org.apache.spark.sql.functions.col
import com.vividsolutions.jts.geom.{Point => jtsPoint, Polygon => jtsPolygon}
-import astraea.spark.rasterframes.encoders.SparkDefaultEncoders._
-import astraea.spark.rasterframes.encoders.StandardEncoders
import geotrellis.proj4.CRS
import geotrellis.vector.Extent
+import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
/**
* Constants identifying column in most RasterFrames.
*
* @since 2/19/18
*/
-trait StandardColumns extends StandardEncoders {
+trait StandardColumns {
/** Default RasterFrame spatial column name. */
val SPATIAL_KEY_COLUMN = col("spatial_key").as[SpatialKey]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala b/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala
index e172c5b92..3f09e1f38 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala
@@ -22,15 +22,9 @@
package astraea.spark.rasterframes.encoders
import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
-import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
-import astraea.spark.rasterframes.util.CRSParser
-import com.vividsolutions.jts.geom.Envelope
-import geotrellis.proj4.CRS
-import geotrellis.raster.{CellType, Tile}
-import geotrellis.vector.Extent
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.rf.{RasterSourceUDT, TileUDT}
+import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@@ -53,7 +47,7 @@ trait CatalystSerializer[T] extends Serializable {
final def fromInternalRow(row: InternalRow): T = from(row, CatalystIO[InternalRow])
}
-object CatalystSerializer {
+object CatalystSerializer extends StandardSerializers {
def apply[T: CatalystSerializer]: CatalystSerializer[T] = implicitly
/**
@@ -66,6 +60,9 @@ object CatalystSerializer {
trait CatalystIO[R] extends Serializable {
def create(values: Any*): R
def to[T: CatalystSerializer](t: T): R = CatalystSerializer[T].to(t, this)
+ def toSeq[T: CatalystSerializer](t: Seq[T]): AnyRef
+ def get[T: CatalystSerializer](d: R, ordinal: Int): T
+ def getSeq[T: CatalystSerializer](d: R, ordinal: Int): Seq[T]
def isNullAt(d: R, ordinal: Int): Boolean
def getBoolean(d: R, ordinal: Int): Boolean
def getByte(d: R, ordinal: Int): Byte
@@ -76,7 +73,6 @@ object CatalystSerializer {
def getDouble(d: R, ordinal: Int): Double
def getString(d: R, ordinal: Int): String
def getByteArray(d: R, ordinal: Int): Array[Byte]
- def get[T: CatalystSerializer](d: R, ordinal: Int): T
def encode(str: String): AnyRef
}
@@ -93,11 +89,17 @@ object CatalystSerializer {
override def getFloat(d: R, ordinal: Int): Float = d.getFloat(ordinal)
override def getDouble(d: R, ordinal: Int): Double = d.getDouble(ordinal)
override def getString(d: R, ordinal: Int): String = d.getString(ordinal)
- override def getByteArray(d: R, ordinal: Int): Array[Byte] = d.get(ordinal).asInstanceOf[Array[Byte]]
+ override def getByteArray(d: R, ordinal: Int): Array[Byte] =
+ d.get(ordinal).asInstanceOf[Array[Byte]]
override def get[T: CatalystSerializer](d: R, ordinal: Int): T = {
- val struct = d.getStruct(ordinal)
- struct.to[T]
+ d.getAs[Any](ordinal) match {
+ case r: Row => r.to[T]
+ case o => o.asInstanceOf[T]
+ }
}
+ override def toSeq[T: CatalystSerializer](t: Seq[T]): AnyRef = t.map(_.toRow)
+ override def getSeq[T: CatalystSerializer](d: R, ordinal: Int): Seq[T] =
+ d.getSeq[Row](ordinal).map(_.to[T])
override def encode(str: String): String = str
}
@@ -122,92 +124,22 @@ object CatalystSerializer {
struct.to[T]
}
override def create(values: Any*): InternalRow = InternalRow(values: _*)
+ override def toSeq[T: CatalystSerializer](t: Seq[T]): ArrayData =
+ ArrayData.toArrayData(t.map(_.toInternalRow).toArray)
+
+ override def getSeq[T: CatalystSerializer](d: InternalRow, ordinal: Int): Seq[T] = {
+ val ad = d.getArray(ordinal)
+ val result = Array.ofDim[Any](ad.numElements()).asInstanceOf[Array[T]]
+ ad.foreach(
+ CatalystSerializer[T].schema,
+ (i, v) => result(i) = v.asInstanceOf[InternalRow].to[T]
+ )
+ result.toSeq
+ }
override def encode(str: String): UTF8String = UTF8String.fromString(str)
}
}
- implicit val envelopeSerializer: CatalystSerializer[Envelope] = new CatalystSerializer[Envelope] {
- override def schema: StructType = StructType(Seq(
- StructField("minX", DoubleType, false),
- StructField("maxX", DoubleType, false),
- StructField("minY", DoubleType, false),
- StructField("maxY", DoubleType, false)
- ))
-
- override protected def to[R](t: Envelope, io: CatalystIO[R]): R = io.create(
- t.getMinX, t.getMaxX, t.getMinY, t.getMaxX
- )
-
- override protected def from[R](t: R, io: CatalystIO[R]): Envelope = new Envelope(
- io.getDouble(t, 0), io.getDouble(t, 1), io.getDouble(t, 2), io.getDouble(t, 3)
- )
- }
-
- implicit val extentSerializer: CatalystSerializer[Extent] = new CatalystSerializer[Extent] {
- override def schema: StructType = StructType(Seq(
- StructField("xmin", DoubleType, false),
- StructField("ymin", DoubleType, false),
- StructField("xmax", DoubleType, false),
- StructField("ymax", DoubleType, false)
- ))
- override def to[R](t: Extent, io: CatalystIO[R]): R = io.create(
- t.xmin, t.ymin, t.xmax, t.ymax
- )
- override def from[R](row: R, io: CatalystIO[R]): Extent = Extent(
- io.getDouble(row, 0), io.getDouble(row, 1), io.getDouble(row, 2), io.getDouble(row, 3)
- )
- }
-
- implicit val crsSerializer: CatalystSerializer[CRS] = new CatalystSerializer[CRS] {
- override def schema: StructType = StructType(Seq(
- StructField("crsProj4", StringType, false)
- ))
- override def to[R](t: CRS, io: CatalystIO[R]): R = io.create(
- io.encode(
- // Don't do this... it's 1000x slower to decode.
- //t.epsgCode.map(c => "EPSG:" + c).getOrElse(t.toProj4String)
- t.toProj4String
- )
- )
- override def from[R](row: R, io: CatalystIO[R]): CRS =
- CRSParser(io.getString(row, 0))
- }
-
- implicit val cellTypeSerializer: CatalystSerializer[CellType] = new CatalystSerializer[CellType] {
- override def schema: StructType = StructType(Seq(
- StructField("cellTypeName", StringType, false)
- ))
- override def to[R](t: CellType, io: CatalystIO[R]): R = io.create(
- io.encode(t.toString())
- )
- override def from[R](row: R, io: CatalystIO[R]): CellType =
- CellType.fromName(io.getString(row, 0))
- }
-
- implicit val rasterRefSerializer: CatalystSerializer[RasterRef] = new CatalystSerializer[RasterRef] {
- val rsType = new RasterSourceUDT()
- override def schema: StructType = StructType(Seq(
- StructField("source", rsType, false),
- StructField("subextent", apply[Extent].schema, true)
- ))
-
- override def to[R](t: RasterRef, io: CatalystIO[R]): R = io.create(
- io.to(t.source),
- t.subextent.map(io.to[Extent]).orNull
- )
-
- override def from[R](row: R, io: CatalystIO[R]): RasterRef = RasterRef(
- io.get[RasterSource](row, 0),
- if (io.isNullAt(row, 1)) None
- else Option(io.get[Extent](row, 1))
- )
- }
-
- private[rasterframes]
- implicit def tileSerializer: CatalystSerializer[Tile] = TileUDT.tileSerializer
- private[rasterframes]
- implicit def rasterSourceSerializer: CatalystSerializer[RasterSource] = RasterSourceUDT.rasterSourceSerializer
-
implicit class WithToRow[T: CatalystSerializer](t: T) {
def toInternalRow: InternalRow = CatalystSerializer[T].toInternalRow(t)
def toRow: Row = CatalystSerializer[T].toRow(t)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala b/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala
index a1538e84a..27e452329 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala
@@ -21,47 +21,62 @@
package astraea.spark.rasterframes.encoders
import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
-import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection}
-import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection}
+import org.apache.spark.sql.types.{DataType, ObjectType, StructField, StructType}
import scala.reflect.runtime.universe.TypeTag
object CatalystSerializerEncoder {
case class CatSerializeToRow[T](child: Expression, serde: CatalystSerializer[T])
- extends UnaryExpression with CodegenFallback {
+ extends UnaryExpression {
override def dataType: DataType = serde.schema
override protected def nullSafeEval(input: Any): Any = {
val value = input.asInstanceOf[T]
serde.toInternalRow(value)
}
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+ val cs = ctx.addReferenceObj("serde", serde, serde.getClass.getName)
+ nullSafeCodeGen(ctx, ev, input => s"${ev.value} = $cs.toInternalRow($input);")
+ }
}
case class CatDeserializeFromRow[T](child: Expression, serde: CatalystSerializer[T], outputType: DataType)
- extends UnaryExpression with CodegenFallback {
+ extends UnaryExpression {
override def dataType: DataType = outputType
+
+ private def objType = outputType match {
+ case ot: ObjectType => ot.cls.getName
+ case o => s"java.lang.Object /* $o */" // not sure what to do here... hopefully shouldn't happen
+ }
override protected def nullSafeEval(input: Any): Any = {
val row = input.asInstanceOf[InternalRow]
serde.fromInternalRow(row)
}
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+ val cs = ctx.addReferenceObj("serde", serde, classOf[CatalystSerializer[_]].getName)
+ nullSafeCodeGen(ctx, ev, input => s"${ev.value} = ($objType) $cs.fromInternalRow($input);")
+ }
}
- def apply[T: TypeTag: CatalystSerializer]: ExpressionEncoder[T] = {
+ def apply[T: TypeTag: CatalystSerializer](flat: Boolean = false): ExpressionEncoder[T] = {
val serde = CatalystSerializer[T]
- val schema = StructType(Seq(
- StructField("value", serde.schema)
- ))
+ val schema = if (flat)
+ StructType(Seq(
+ StructField("value", serde.schema, true)
+ ))
+ else serde.schema
val parentType: DataType = ScalaReflection.dataTypeFor[T]
- val inputObject = BoundReference(0, parentType, nullable = false)
+ val inputObject = BoundReference(0, parentType, nullable = true)
val serializer = CatSerializeToRow(inputObject, serde)
val deserializer: Expression = CatDeserializeFromRow(GetColumnByOrdinal(0, schema), serde, parentType)
- ExpressionEncoder(schema, flat = false, Seq(serializer), deserializer, typeToClassTag[T])
+ ExpressionEncoder(schema, flat = flat, Seq(serializer), deserializer, typeToClassTag[T])
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala b/core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala
index 82df2bdff..953c2ed65 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala
@@ -24,9 +24,9 @@ import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.rf.VersionShims.InvokeSafely
-import org.apache.spark.sql.types.{ObjectType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{ObjectType, StringType}
import org.apache.spark.unsafe.types.UTF8String
-import CatalystSerializer._
+
import scala.reflect.classTag
/**
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala b/core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala
index f65227e9a..5888a1974 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala
@@ -8,7 +8,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.NewInstance
import org.apache.spark.sql.catalyst.expressions.{BoundReference, CreateNamedStruct, Literal}
import org.apache.spark.sql.rf.VersionShims.InvokeSafely
import org.apache.spark.sql.types._
-import CatalystSerializer._
+
import scala.reflect.classTag
/**
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/SparkDefaultEncoders.scala b/core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala
similarity index 74%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/SparkDefaultEncoders.scala
rename to core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala
index 9763439f9..670d2e217 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/SparkDefaultEncoders.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala
@@ -29,11 +29,12 @@ import scala.reflect.runtime.universe._
*
* @since 12/28/17
*/
-private[rasterframes] trait SparkDefaultEncoders {
+private[rasterframes] trait SparkBasicEncoders {
implicit def arrayEnc[T: TypeTag]: Encoder[Array[T]] = ExpressionEncoder()
- implicit def genEnc[T: TypeTag]: Encoder[T] = ExpressionEncoder()
- implicit val intEnc = Encoders.scalaInt
- implicit val stringEnc = Encoders.STRING
+ implicit val intEnc: Encoder[Int] = Encoders.scalaInt
+ implicit val longEnc: Encoder[Long] = Encoders.scalaLong
+ implicit val stringEnc: Encoder[String] = Encoders.STRING
+ implicit val doubleEnc: Encoder[Double] = Encoders.scalaDouble
+ implicit val boolEnc: Encoder[Boolean] = Encoders.scalaBoolean
}
-private[rasterframes] object SparkDefaultEncoders extends SparkDefaultEncoders
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala b/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala
index 49fb82b0e..625eea1cd 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala
@@ -20,13 +20,13 @@
package astraea.spark.rasterframes.encoders
import java.net.URI
+import java.sql.Timestamp
-import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
-import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics}
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
+import astraea.spark.rasterframes.model._
+import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics}
import com.vividsolutions.jts.geom.Envelope
import geotrellis.proj4.CRS
-import geotrellis.raster.{CellType, ProjectedRaster, Tile}
+import geotrellis.raster.{CellSize, CellType, Tile, TileLayout}
import geotrellis.spark.tiling.LayoutDefinition
import geotrellis.spark.{KeyBounds, SpaceTimeKey, SpatialKey, TemporalKey, TemporalProjectedExtent, TileLayerMetadata}
import geotrellis.vector.{Extent, ProjectedExtent}
@@ -38,28 +38,36 @@ import scala.reflect.runtime.universe._
/**
* Implicit encoder definitions for RasterFrame types.
*/
-trait StandardEncoders extends SpatialEncoders{
+trait StandardEncoders extends SpatialEncoders {
+ object PrimitiveEncoders extends SparkBasicEncoders
+ def expressionEncoder[T: TypeTag]: ExpressionEncoder[T] = ExpressionEncoder()
implicit def spatialKeyEncoder: ExpressionEncoder[SpatialKey] = ExpressionEncoder()
implicit def temporalKeyEncoder: ExpressionEncoder[TemporalKey] = ExpressionEncoder()
implicit def spaceTimeKeyEncoder: ExpressionEncoder[SpaceTimeKey] = ExpressionEncoder()
- implicit def statsEncoder: ExpressionEncoder[CellStatistics] = ExpressionEncoder()
- implicit def histEncoder: ExpressionEncoder[CellHistogram] = ExpressionEncoder()
implicit def layoutDefinitionEncoder: ExpressionEncoder[LayoutDefinition] = ExpressionEncoder()
implicit def stkBoundsEncoder: ExpressionEncoder[KeyBounds[SpaceTimeKey]] = ExpressionEncoder()
- implicit def extentEncoder: ExpressionEncoder[Extent] = ExpressionEncoder()
-
+ implicit def extentEncoder: ExpressionEncoder[Extent] = ExpressionEncoder[Extent]()
implicit def singlebandTileEncoder: ExpressionEncoder[Tile] = ExpressionEncoder()
- implicit def projectedRasterTileEncoder: ExpressionEncoder[ProjectedRasterTile] = ExpressionEncoder()
implicit def tileLayerMetadataEncoder[K: TypeTag]: ExpressionEncoder[TileLayerMetadata[K]] = TileLayerMetadataEncoder()
implicit def crsEncoder: ExpressionEncoder[CRS] = CRSEncoder()
implicit def projectedExtentEncoder: ExpressionEncoder[ProjectedExtent] = ProjectedExtentEncoder()
implicit def temporalProjectedExtentEncoder: ExpressionEncoder[TemporalProjectedExtent] = TemporalProjectedExtentEncoder()
implicit def cellTypeEncoder: ExpressionEncoder[CellType] = CellTypeEncoder()
+ implicit def cellSizeEncoder: ExpressionEncoder[CellSize] = ExpressionEncoder()
implicit def uriEncoder: ExpressionEncoder[URI] = URIEncoder()
implicit def envelopeEncoder: ExpressionEncoder[Envelope] = EnvelopeEncoder()
- implicit def rrEncoder: ExpressionEncoder[RasterRef] = ExpressionEncoder()
- implicit def prEncoder: ExpressionEncoder[ProjectedRaster[Tile]] = ExpressionEncoder()
- implicit def rsEncoder: ExpressionEncoder[RasterSource] = ExpressionEncoder()
+ implicit def timestampEncoder: ExpressionEncoder[Timestamp] = ExpressionEncoder()
+ implicit def strMapEncoder: ExpressionEncoder[Map[String, String]] = ExpressionEncoder()
+ implicit def cellStatsEncoder: ExpressionEncoder[CellStatistics] = ExpressionEncoder()
+ implicit def cellHistEncoder: ExpressionEncoder[CellHistogram] = ExpressionEncoder()
+ implicit def localCellStatsEncoder: ExpressionEncoder[LocalCellStatistics] = ExpressionEncoder()
+ implicit def tilelayoutEncoder: ExpressionEncoder[TileLayout] = ExpressionEncoder()
+ implicit def cellContextEncoder: ExpressionEncoder[CellContext] = CellContext.encoder
+ implicit def cellsEncoder: ExpressionEncoder[Cells] = Cells.encoder
+ implicit def tileContextEncoder: ExpressionEncoder[TileContext] = TileContext.encoder
+ implicit def tileDataContextEncoder: ExpressionEncoder[TileDataContext] = TileDataContext.encoder
+
+
}
object StandardEncoders extends StandardEncoders
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala b/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala
new file mode 100644
index 000000000..aaff5c534
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala
@@ -0,0 +1,251 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.encoders
+import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
+import astraea.spark.rasterframes.util.CRSParser
+import com.vividsolutions.jts.geom.Envelope
+import geotrellis.proj4.CRS
+import geotrellis.raster._
+import geotrellis.spark._
+import geotrellis.spark.tiling.LayoutDefinition
+import geotrellis.vector._
+import org.apache.spark.sql.types._
+
+/** Collection of CatalystSerializers for third-party types. */
+trait StandardSerializers {
+
+ implicit val envelopeSerializer: CatalystSerializer[Envelope] = new CatalystSerializer[Envelope] {
+ override def schema: StructType = StructType(Seq(
+ StructField("minX", DoubleType, false),
+ StructField("maxX", DoubleType, false),
+ StructField("minY", DoubleType, false),
+ StructField("maxY", DoubleType, false)
+ ))
+
+ override protected def to[R](t: Envelope, io: CatalystIO[R]): R = io.create(
+ t.getMinX, t.getMaxX, t.getMinY, t.getMaxX
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): Envelope = new Envelope(
+ io.getDouble(t, 0), io.getDouble(t, 1), io.getDouble(t, 2), io.getDouble(t, 3)
+ )
+ }
+
+ implicit val extentSerializer: CatalystSerializer[Extent] = new CatalystSerializer[Extent] {
+ override def schema: StructType = StructType(Seq(
+ StructField("xmin", DoubleType, false),
+ StructField("ymin", DoubleType, false),
+ StructField("xmax", DoubleType, false),
+ StructField("ymax", DoubleType, false)
+ ))
+ override def to[R](t: Extent, io: CatalystIO[R]): R = io.create(
+ t.xmin, t.ymin, t.xmax, t.ymax
+ )
+ override def from[R](row: R, io: CatalystIO[R]): Extent = Extent(
+ io.getDouble(row, 0), io.getDouble(row, 1), io.getDouble(row, 2), io.getDouble(row, 3)
+ )
+ }
+
+ implicit val crsSerializer: CatalystSerializer[CRS] = new CatalystSerializer[CRS] {
+ override def schema: StructType = StructType(Seq(
+ StructField("crsProj4", StringType, false)
+ ))
+ override def to[R](t: CRS, io: CatalystIO[R]): R = io.create(
+ io.encode(
+ // Don't do this... it's 1000x slower to decode.
+ //t.epsgCode.map(c => "EPSG:" + c).getOrElse(t.toProj4String)
+ t.toProj4String
+ )
+ )
+ override def from[R](row: R, io: CatalystIO[R]): CRS =
+ CRSParser(io.getString(row, 0))
+ }
+
+ implicit val cellTypeSerializer: CatalystSerializer[CellType] = new CatalystSerializer[CellType] {
+ override def schema: StructType = StructType(Seq(
+ StructField("cellTypeName", StringType, false)
+ ))
+ override def to[R](t: CellType, io: CatalystIO[R]): R = io.create(
+ io.encode(t.toString())
+ )
+ override def from[R](row: R, io: CatalystIO[R]): CellType =
+ CellType.fromName(io.getString(row, 0))
+ }
+
+ implicit val projectedExtentSerializer: CatalystSerializer[ProjectedExtent] = new CatalystSerializer[ProjectedExtent] {
+ override def schema: StructType = StructType(Seq(
+ StructField("extent", CatalystSerializer[Extent].schema, false),
+ StructField("crs", CatalystSerializer[CRS].schema, false)
+ ))
+
+ override protected def to[R](t: ProjectedExtent, io: CatalystSerializer.CatalystIO[R]): R = io.create(
+ io.to(t.extent),
+ io.to(t.crs)
+ )
+
+ override protected def from[R](t: R, io: CatalystSerializer.CatalystIO[R]): ProjectedExtent = ProjectedExtent(
+ io.get[Extent](t, 0),
+ io.get[CRS](t, 1)
+ )
+ }
+
+ implicit val spatialKeySerializer: CatalystSerializer[SpatialKey] = new CatalystSerializer[SpatialKey] {
+ override def schema: StructType = StructType(Seq(
+ StructField("col", IntegerType, false),
+ StructField("row", IntegerType, false)
+ ))
+
+ override protected def to[R](t: SpatialKey, io: CatalystIO[R]): R = io.create(
+ t.col,
+ t.row
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): SpatialKey = SpatialKey(
+ io.getInt(t, 0),
+ io.getInt(t, 1)
+ )
+ }
+
+ implicit val spacetimeKeySerializer: CatalystSerializer[SpaceTimeKey] = new CatalystSerializer[SpaceTimeKey] {
+ override def schema: StructType = StructType(Seq(
+ StructField("col", IntegerType, false),
+ StructField("row", IntegerType, false),
+ StructField("instant", LongType, false)
+ ))
+
+ override protected def to[R](t: SpaceTimeKey, io: CatalystIO[R]): R = io.create(
+ t.col,
+ t.row,
+ t.instant
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): SpaceTimeKey = SpaceTimeKey(
+ io.getInt(t, 0),
+ io.getInt(t, 1),
+ io.getLong(t, 2)
+ )
+ }
+
+ implicit val cellSizeSerializer: CatalystSerializer[CellSize] = new CatalystSerializer[CellSize] {
+ override def schema: StructType = StructType(Seq(
+ StructField("width", DoubleType, false),
+ StructField("height", DoubleType, false)
+ ))
+
+ override protected def to[R](t: CellSize, io: CatalystIO[R]): R = io.create(
+ t.width,
+ t.height
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): CellSize = CellSize(
+ io.getDouble(t, 0),
+ io.getDouble(t, 1)
+ )
+ }
+
+ implicit val tileLayoutSerializer: CatalystSerializer[TileLayout] = new CatalystSerializer[TileLayout] {
+ override def schema: StructType = StructType(Seq(
+ StructField("layoutCols", IntegerType, false),
+ StructField("layoutRows", IntegerType, false),
+ StructField("tileCols", IntegerType, false),
+ StructField("tileRows", IntegerType, false)
+ ))
+
+ override protected def to[R](t: TileLayout, io: CatalystIO[R]): R = io.create(
+ t.layoutCols,
+ t.layoutRows,
+ t.tileCols,
+ t.tileRows
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): TileLayout = TileLayout(
+ io.getInt(t, 0),
+ io.getInt(t, 1),
+ io.getInt(t, 2),
+ io.getInt(t, 3)
+ )
+ }
+
+ implicit val layoutDefinitionSerializer = new CatalystSerializer[LayoutDefinition] {
+ override def schema: StructType = StructType(Seq(
+ StructField("extent", CatalystSerializer[Extent].schema, true),
+ StructField("tileLayout", CatalystSerializer[TileLayout].schema, true)
+ ))
+
+ override protected def to[R](t: LayoutDefinition, io: CatalystIO[R]): R = io.create(
+ io.to(t.extent),
+ io.to(t.tileLayout)
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): LayoutDefinition = LayoutDefinition(
+ io.get[Extent](t, 0),
+ io.get[TileLayout](t, 1)
+ )
+ }
+
+ implicit def boundsSerializer[T: CatalystSerializer]: CatalystSerializer[KeyBounds[T]] = new CatalystSerializer[KeyBounds[T]] {
+ override def schema: StructType = StructType(Seq(
+ StructField("minKey", CatalystSerializer[T].schema, true),
+ StructField("maxKey", CatalystSerializer[T].schema, true)
+ ))
+
+ override protected def to[R](t: KeyBounds[T], io: CatalystIO[R]): R = io.create(
+ io.to(t.get.minKey),
+ io.to(t.get.maxKey)
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): KeyBounds[T] = KeyBounds(
+ io.get[T](t, 0),
+ io.get[T](t, 1)
+ )
+ }
+
+ def tileLayerMetadataSerializer[T: CatalystSerializer]: CatalystSerializer[TileLayerMetadata[T]] = new CatalystSerializer[TileLayerMetadata[T]] {
+ override def schema: StructType = StructType(Seq(
+ StructField("cellType", CatalystSerializer[CellType].schema, false),
+ StructField("layout", CatalystSerializer[LayoutDefinition].schema, false),
+ StructField("extent", CatalystSerializer[Extent].schema, false),
+ StructField("crs", CatalystSerializer[CRS].schema, false),
+ StructField("bounds", CatalystSerializer[KeyBounds[T]].schema, false)
+ ))
+
+ override protected def to[R](t: TileLayerMetadata[T], io: CatalystIO[R]): R = io.create(
+ io.to(t.cellType),
+ io.to(t.layout),
+ io.to(t.extent),
+ io.to(t.crs),
+ io.to(t.bounds.head)
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): TileLayerMetadata[T] = TileLayerMetadata(
+ io.get[CellType](t, 0),
+ io.get[LayoutDefinition](t, 1),
+ io.get[Extent](t, 2),
+ io.get[CRS](t, 3),
+ io.get[KeyBounds[T]](t, 4)
+ )
+ }
+
+ implicit val spatialKeyTLMSerializer = tileLayerMetadataSerializer[SpatialKey]
+ implicit val spaceTimeKeyTLMSerializer = tileLayerMetadataSerializer[SpaceTimeKey]
+
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala
new file mode 100644
index 000000000..3fac44c65
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala
@@ -0,0 +1,78 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions
+
+import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import com.typesafe.scalalogging.LazyLogging
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.BinaryExpression
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.DataType
+
+/** Operation combining two tiles or a tile and a scalar into a new tile. */
+trait BinaryLocalRasterOp extends BinaryExpression with LazyLogging {
+
+ override def dataType: DataType = left.dataType
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (!tileExtractor.isDefinedAt(left.dataType)) {
+ TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.")
+ }
+ else if (!tileOrNumberExtractor.isDefinedAt(right.dataType)) {
+ TypeCheckFailure(s"Input type '${right.dataType}' does not conform to a compatible type.")
+ }
+ else TypeCheckSuccess
+ }
+
+ override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+ implicit val tileSer = TileUDT.tileSerializer
+ val (leftTile, leftCtx) = tileExtractor(left.dataType)(row(input1))
+ val result = tileOrNumberExtractor(right.dataType)(input2) match {
+ case TileArg(rightTile, rightCtx) =>
+ if (leftCtx.isEmpty && rightCtx.isDefined)
+ logger.warn(
+ s"Right-hand parameter '${right}' provided an extent and CRS, but the left-hand parameter " +
+ s"'${left}' didn't have any. Because the left-hand side defines output type, the right-hand context will be lost.")
+
+ if(leftCtx.isDefined && rightCtx.isDefined && leftCtx != rightCtx)
+ logger.warn(s"Both '${left}' and '${right}' provided an extent and CRS, but they are different. Left-hand side will be used.")
+
+ op(leftTile, rightTile)
+ case DoubleArg(d) => op(fpTile(leftTile), d)
+ case IntegerArg(i) => op(leftTile, i)
+ }
+
+ leftCtx match {
+ case Some(ctx) => ctx.toProjectRasterTile(result).toInternalRow
+ case None => result.toInternalRow
+ }
+ }
+
+
+ protected def op(left: Tile, right: Tile): Tile
+ protected def op(left: Tile, right: Double): Tile
+ protected def op(left: Tile, right: Int): Tile
+}
+
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala
new file mode 100644
index 000000000..02f8fc29e
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala
@@ -0,0 +1,70 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions
+import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor
+import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import com.typesafe.scalalogging.LazyLogging
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.BinaryExpression
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.DataType
+
+/** Operation combining two tiles into a new tile. */
+trait BinaryRasterOp extends BinaryExpression with LazyLogging {
+
+ override def dataType: DataType = left.dataType
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (!tileExtractor.isDefinedAt(left.dataType)) {
+ TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.")
+ }
+ else if (!tileExtractor.isDefinedAt(right.dataType)) {
+ TypeCheckFailure(s"Input type '${right.dataType}' does not conform to a raster type.")
+ }
+ else TypeCheckSuccess
+ }
+
+ protected def op(left: Tile, right: Tile): Tile
+
+ override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+ implicit val tileSer = TileUDT.tileSerializer
+ val (leftTile, leftCtx) = tileExtractor(left.dataType)(row(input1))
+ val (rightTile, rightCtx) = tileExtractor(right.dataType)(row(input2))
+
+ if (leftCtx.isEmpty && rightCtx.isDefined)
+ logger.warn(
+ s"Right-hand parameter '${right}' provided an extent and CRS, but the left-hand parameter " +
+ s"'${left}' didn't have any. Because the left-hand side defines output type, the right-hand context will be lost.")
+
+ if(leftCtx.isDefined && rightCtx.isDefined && leftCtx != rightCtx)
+ logger.warn(s"Both '${left}' and '${right}' provided an extent and CRS, but they are different. Left-hand side will be used.")
+
+ val result = op(leftTile, rightTile)
+
+ leftCtx match {
+ case Some(ctx) => ctx.toProjectRasterTile(result).toInternalRow
+ case None => result.toInternalRow
+ }
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala
new file mode 100644
index 000000000..1dabc8201
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala
@@ -0,0 +1,114 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions
+import astraea.spark.rasterframes.encoders.CatalystSerializer
+import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.model.TileContext
+import astraea.spark.rasterframes.ref.{ProjectedRasterLike, RasterRef, RasterSource}
+import astraea.spark.rasterframes.tiles.ProjectedRasterTile
+import geotrellis.raster.{CellGrid, Tile}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.rf.{TileUDT, _}
+import org.apache.spark.sql.types._
+
+private[expressions]
+object DynamicExtractors {
+ /** Partial function for pulling a tile and its contesxt from an input row. */
+ lazy val tileExtractor: PartialFunction[DataType, InternalRow => (Tile, Option[TileContext])] = {
+ case _: TileUDT =>
+ (row: InternalRow) =>
+ (row.to[Tile](TileUDT.tileSerializer), None)
+ case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) =>
+ (row: InternalRow) => {
+ val prt = row.to[ProjectedRasterTile]
+ (prt, Some(TileContext(prt)))
+ }
+ }
+
+ lazy val rowTileExtractor: PartialFunction[DataType, Row => (Tile, Option[TileContext])] = {
+ case _: TileUDT =>
+ (row: Row) => (row.to[Tile](TileUDT.tileSerializer), None)
+ case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) =>
+ (row: Row) => {
+ val prt = row.to[ProjectedRasterTile]
+ (prt, Some(TileContext(prt)))
+ }
+ }
+
+ /** Partial function for pulling a ProjectedRasterLike an input row. */
+ lazy val projectedRasterLikeExtractor: PartialFunction[DataType, InternalRow ⇒ ProjectedRasterLike] = {
+ case _: RasterSourceUDT ⇒
+ (row: InternalRow) ⇒ row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer)
+ case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) =>
+ (row: InternalRow) => row.to[ProjectedRasterTile]
+ case t if t.conformsTo(CatalystSerializer[RasterRef].schema) =>
+ (row: InternalRow) ⇒ row.to[RasterRef]
+ }
+
+ /** Partial function for pulling a CellGrid from an input row. */
+ lazy val gridExtractor: PartialFunction[DataType, InternalRow ⇒ CellGrid] = {
+ case _: TileUDT ⇒
+ (row: InternalRow) ⇒ row.to[Tile](TileUDT.tileSerializer)
+ case _: RasterSourceUDT ⇒
+ (row: InternalRow) ⇒ row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer)
+ case t if t.conformsTo(CatalystSerializer[RasterRef].schema) ⇒
+ (row: InternalRow) ⇒ row.to[RasterRef]
+ }
+
+ sealed trait TileOrNumberArg
+ sealed trait NumberArg extends TileOrNumberArg
+ case class TileArg(tile: Tile, ctx: Option[TileContext]) extends TileOrNumberArg
+ case class DoubleArg(value: Double) extends NumberArg
+ case class IntegerArg(value: Int) extends NumberArg
+
+ lazy val tileOrNumberExtractor: PartialFunction[DataType, Any => TileOrNumberArg] =
+ tileArgExtractor.orElse(numberArgExtractor)
+
+ lazy val tileArgExtractor: PartialFunction[DataType, Any => TileArg] = {
+ case t if tileExtractor.isDefinedAt(t) => {
+ case ir: InternalRow =>
+ val (tile, ctx) = tileExtractor(t)(ir)
+ TileArg(tile, ctx)
+ }
+ }
+
+ lazy val numberArgExtractor: PartialFunction[DataType, Any => NumberArg] =
+ doubleArgExtractor.orElse(intArgExtractor)
+
+ lazy val doubleArgExtractor: PartialFunction[DataType, Any => DoubleArg] = {
+ case _: DoubleType | _: FloatType | _: DecimalType => {
+ case d: Double => DoubleArg(d)
+ case f: Float => DoubleArg(f.toDouble)
+ case d: Decimal => DoubleArg(d.toDouble)
+ }
+ }
+
+ lazy val intArgExtractor: PartialFunction[DataType, Any => IntegerArg] = {
+ case _: IntegerType | _: ByteType | _: ShortType => {
+ case i: Int => IntegerArg(i)
+ case b: Byte => IntegerArg(b)
+ case s: Short => IntegerArg(s.toInt)
+ case c: Char => IntegerArg(c.toInt)
+ }
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/GeomDeserializerSupport.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/GeomDeserializerSupport.scala
deleted file mode 100644
index ce17b3430..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/GeomDeserializerSupport.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-package astraea.spark.rasterframes.expressions
-
-import com.vividsolutions.jts.geom.Geometry
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.jts.AbstractGeometryUDT
-
-/**
- * Support for deserializing JTS geometry inside expressions.
- *
- * @since 2/22/18
- */
-trait GeomDeserializerSupport {
- def extractGeometry(expr: Expression, input: Any): Geometry = {
- input match {
- case g: Geometry ⇒ g
- case r: InternalRow ⇒
- expr.dataType match {
- case udt: AbstractGeometryUDT[_] ⇒ udt.deserialize(r)
- }
- }
- }
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetEnvelope.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/GetEnvelope.scala
deleted file mode 100644
index bdecb45cd..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetEnvelope.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-package astraea.spark.rasterframes.expressions
-import astraea.spark.rasterframes.encoders.EnvelopeEncoder
-import com.vividsolutions.jts.geom.Envelope
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
-import org.apache.spark.sql.rf._
-import org.apache.spark.sql.types._
-import org.apache.spark.sql.{Column, TypedColumn}
-
-
-/**
- * Extracts the bounding box (envelope) of arbitrary JTS Geometry.
- *
- * @since 2/22/18
- */
-@deprecated("Replace usages of this with GeometryToBounds", "11/4/2018")
-case class GetEnvelope(child: Expression) extends UnaryExpression
- with CodegenFallback with GeomDeserializerSupport {
-
- override def nodeName: String = "envelope"
-
- override protected def nullSafeEval(input: Any): Any = {
- val geom = extractGeometry(child, input)
- val env = geom.getEnvelopeInternal
- InternalRow(env.getMinX, env.getMaxX, env.getMinY, env.getMaxY)
- }
-
- def dataType: DataType = EnvelopeEncoder.schema
-}
-
-object GetEnvelope {
- import astraea.spark.rasterframes.encoders.StandardEncoders._
- def apply(col: Column): TypedColumn[Any, Envelope] =
- new GetEnvelope(col.expr).asColumn.as[Envelope]
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala
new file mode 100644
index 000000000..edc52fcf7
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala
@@ -0,0 +1,41 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnaryExpression
+
+trait NullToValue { self: UnaryExpression =>
+
+ def na: Any
+
+ override def eval(input: InternalRow): Any = {
+ if (input == null) na
+ else {
+ val value = child.eval(input)
+ if (value == null) {
+ na
+ } else {
+ nullSafeEval(value)
+ }
+ }
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala
index 482e300d2..b856ae2be 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala
@@ -21,16 +21,12 @@
package astraea.spark.rasterframes.expressions
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
-import geotrellis.raster.{CellGrid, Grid, Tile}
+import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import geotrellis.raster.CellGrid
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
import org.apache.spark.sql.catalyst.expressions.UnaryExpression
-import org.apache.spark.sql.rf._
-import org.apache.spark.sql.types.DataType
/**
* Implements boilerplate for subtype expressions processing TileUDT, RasterSourceUDT, and RasterRefs
@@ -39,19 +35,8 @@ import org.apache.spark.sql.types.DataType
* @since 11/4/18
*/
trait OnCellGridExpression extends UnaryExpression {
- // TODO: DRY w.r.t. OnProjectedRasterExpression....
-
- private val toGrid: PartialFunction[DataType, InternalRow ⇒ CellGrid] = {
- case _: TileUDT ⇒
- (row: InternalRow) ⇒ row.to[Tile]
- case _: RasterSourceUDT ⇒
- (row: InternalRow) ⇒ row.to[RasterSource]
- case t if t.conformsTo(CatalystSerializer[RasterRef].schema) ⇒
- (row: InternalRow) ⇒ row.to[RasterRef]
- }
-
override def checkInputDataTypes(): TypeCheckResult = {
- if (!toGrid.isDefinedAt(child.dataType)) {
+ if (!gridExtractor.isDefinedAt(child.dataType)) {
TypeCheckFailure(s"Input type '${child.dataType}' does not conform to `Grid`.")
}
else TypeCheckSuccess
@@ -60,7 +45,7 @@ trait OnCellGridExpression extends UnaryExpression {
final override protected def nullSafeEval(input: Any): Any = {
input match {
case row: InternalRow ⇒
- val g = toGrid(child.dataType)(row)
+ val g = gridExtractor(child.dataType)(row)
eval(g)
case o ⇒ throw new IllegalArgumentException(s"Unsupported input type: $o")
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnProjectedRasterExpression.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala
similarity index 58%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/OnProjectedRasterExpression.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala
index f857ff852..a8797ae49 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnProjectedRasterExpression.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala
@@ -21,17 +21,12 @@
package astraea.spark.rasterframes.expressions
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.ref.{ProjectedRasterLike, RasterRef, RasterSource}
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
-import geotrellis.raster.Tile
+import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import astraea.spark.rasterframes.model.TileContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
import org.apache.spark.sql.catalyst.expressions.UnaryExpression
-import org.apache.spark.sql.rf._
-import org.apache.spark.sql.types.DataType
/**
* Implements boilerplate for subtype expressions processing TileUDT (when ProjectedRasterTile), RasterSourceUDT, and
@@ -39,26 +34,10 @@ import org.apache.spark.sql.types.DataType
*
* @since 11/3/18
*/
-trait OnProjectedRasterExpression extends UnaryExpression {
-
- private val toPRL: PartialFunction[DataType, InternalRow ⇒ ProjectedRasterLike] = {
- case _: TileUDT ⇒
- (row: InternalRow) ⇒ {
- val tile = row.to[Tile]
- tile match {
- case pr: ProjectedRasterTile ⇒ pr
- // TODO: don't let match error happen. Refactor this sub case up a level.
- // Not sure how to do do it since we're returning functions that are evaluated later.
- }
- }
- case _: RasterSourceUDT ⇒
- (row: InternalRow) ⇒ row.to[RasterSource]
- case t if t.conformsTo(CatalystSerializer[RasterRef].schema) ⇒
- (row: InternalRow) ⇒ row.to[RasterRef]
- }
+trait OnTileContextExpression extends UnaryExpression {
override def checkInputDataTypes(): TypeCheckResult = {
- if (!toPRL.isDefinedAt(child.dataType)) {
+ if (!projectedRasterLikeExtractor.isDefinedAt(child.dataType)) {
TypeCheckFailure(s"Input type '${child.dataType}' does not conform to `ProjectedRasterLike`.")
}
else TypeCheckSuccess
@@ -67,13 +46,12 @@ trait OnProjectedRasterExpression extends UnaryExpression {
final override protected def nullSafeEval(input: Any): Any = {
input match {
case row: InternalRow ⇒
- val prl = toPRL(child.dataType)(row)
- eval(prl)
+ val prl = projectedRasterLikeExtractor(child.dataType)(row)
+ eval(TileContext(prl.extent, prl.crs))
case o ⇒ throw new IllegalArgumentException(s"Unsupported input type: $o")
}
}
/** Implemented by subtypes to process incoming ProjectedRasterLike entity. */
- def eval(prl: ProjectedRasterLike): Any
-
+ def eval(ctx: TileContext): Any
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/RequiresTile.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/RequiresTile.scala
deleted file mode 100644
index ee11744d0..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/RequiresTile.scala
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package astraea.spark.rasterframes.expressions
-
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
-import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
-import org.apache.spark.sql.rf.TileUDT
-
-/**
- * Mixin for indicating an expression requires a Tile for input.
- *
- * @since 12/28/17
- */
-trait RequiresTile { self: UnaryExpression ⇒
- abstract override def checkInputDataTypes(): TypeCheckResult = RequiresTile.check(child)
-}
-
-object RequiresTile {
- def check(expr: Expression): TypeCheckResult =
- if(expr.dataType.isInstanceOf[TileUDT]) TypeCheckSuccess
- else TypeCheckFailure(
- s"Expected 'TileUDT' but received '${expr.dataType.simpleString}'"
- )
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala
index dde85defc..e994c8a64 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala
@@ -21,21 +21,32 @@ package astraea.spark.rasterframes.expressions
import astraea.spark.rasterframes.expressions.SpatialRelation.RelationPredicate
import com.vividsolutions.jts.geom._
+import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{ScalaUDF, _}
+import org.apache.spark.sql.jts.AbstractGeometryUDT
import org.apache.spark.sql.types._
import org.locationtech.geomesa.spark.jts.udf.SpatialRelationFunctions._
-
-
/**
* Determine if two spatial constructs intersect each other.
*
* @since 12/28/17
*/
abstract class SpatialRelation extends BinaryExpression
- with CodegenFallback with GeomDeserializerSupport {
+ with CodegenFallback {
+
+ def extractGeometry(expr: Expression, input: Any): Geometry = {
+ input match {
+ case g: Geometry ⇒ g
+ case r: InternalRow ⇒
+ expr.dataType match {
+ case udt: AbstractGeometryUDT[_] ⇒ udt.deserialize(r)
+ }
+ }
+ }
+ // TODO: replace with serializer.
lazy val jtsPointEncoder = ExpressionEncoder[Point]()
override def toString: String = s"$nodeName($left, $right)"
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/TileAssembler.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala
similarity index 92%
rename from core/src/main/scala/astraea/spark/rasterframes/functions/TileAssembler.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala
index 6c55982c3..c3a32267f 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/TileAssembler.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2017 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,21 +15,20 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.functions
+package astraea.spark.rasterframes.expressions
+
+import java.nio.ByteBuffer
-import java.nio.{ByteBuffer, DoubleBuffer}
-import astraea.spark.rasterframes.encoders._
+import astraea.spark.rasterframes.expressions.TileAssembler.TileBuffer
import astraea.spark.rasterframes.util._
-import astraea.spark.rasterframes.NOMINAL_TILE_SIZE
-import astraea.spark.rasterframes.functions.TileAssembler.TileBuffer
import geotrellis.raster.{DataType => _, _}
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.aggregate.{
- ImperativeAggregate, TypedImperativeAggregate
-}
-import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes, Literal}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, TypedColumn}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala
new file mode 100644
index 000000000..049e6d9a1
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala
@@ -0,0 +1,58 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions
+
+import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import com.typesafe.scalalogging.LazyLogging
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.UnaryExpression
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.DataType
+
+/** Operation on a tile returning a tile. */
+trait UnaryLocalRasterOp extends UnaryExpression with LazyLogging {
+
+ override def dataType: DataType = child.dataType
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (!tileExtractor.isDefinedAt(child.dataType)) {
+ TypeCheckFailure(s"Input type '${child.dataType}' does not conform to a raster type.")
+ }
+ else TypeCheckSuccess
+ }
+
+ override protected def nullSafeEval(input: Any): Any = {
+ implicit val tileSer = TileUDT.tileSerializer
+ val (childTile, childCtx) = tileExtractor(child.dataType)(row(input))
+
+ childCtx match {
+ case Some(ctx) => ctx.toProjectRasterTile(op(childTile)).toInternalRow
+ case None => op(childTile).toInternalRow
+ }
+ }
+
+ protected def op(child: Tile): Tile
+}
+
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala
new file mode 100644
index 000000000..a28ae6753
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala
@@ -0,0 +1,45 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions
+import astraea.spark.rasterframes.expressions.DynamicExtractors.rowTileExtractor
+import geotrellis.raster.Tile
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
+import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate
+import scala.reflect.runtime.universe._
+
+/** Mixin providing boilerplate for DeclarativeAggrates over tile-conforming columns. */
+trait UnaryRasterAggregate extends DeclarativeAggregate {
+ def child: Expression
+
+ def nullable: Boolean = child.nullable
+
+ def children = Seq(child)
+
+ protected def tileOpAsExpression[R: TypeTag](name: String, op: Tile => R): Expression => ScalaUDF =
+ udfexpr[R, Any](name, (a: Any) => op(extractTileFromAny(a)))
+
+ protected val extractTileFromAny = (a: Any) => a match {
+ case t: Tile => t
+ case r: Row => rowTileExtractor(child.dataType)(r)._1
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala
new file mode 100644
index 000000000..f21dc4bb5
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala
@@ -0,0 +1,46 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions
+import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.UnaryExpression
+
+/** Boilerplate for expressions operating on a single Tile-like . */
+trait UnaryRasterOp extends UnaryExpression {
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (!tileExtractor.isDefinedAt(child.dataType)) {
+ TypeCheckFailure(s"Input type '${child.dataType}' does not conform to a raster type.")
+ } else TypeCheckSuccess
+ }
+
+ override protected def nullSafeEval(input: Any): Any = {
+ // TODO: Ensure InternalRowTile is preserved
+ val (tile, ctx) = tileExtractor(child.dataType)(row(input))
+ eval(tile, ctx)
+ }
+
+ protected def eval(tile: Tile, ctx: Option[TileContext]): Any
+}
+
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala
new file mode 100644
index 000000000..7cb7ba3b1
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala
@@ -0,0 +1,53 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.accessors
+
+import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.UnaryRasterOp
+import astraea.spark.rasterframes.model.TileContext
+import astraea.spark.rasterframes.tiles.InternalRowTile
+import astraea.spark.rasterframes.tiles.ProjectedRasterTile.ConcreteProjectedRasterTile
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+
+/** Expression to extract at tile from several types that contain tiles.*/
+case class ExtractTile(child: Expression) extends UnaryRasterOp with CodegenFallback {
+ override def dataType: DataType = new TileUDT()
+
+ override def nodeName: String = "extract_tile"
+ implicit val tileSer = TileUDT.tileSerializer
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = tile match {
+ case irt: InternalRowTile => irt.mem
+ case tile: ConcreteProjectedRasterTile => tile.t.toInternalRow
+ case tile: Tile => tile.toInternalRow
+ }
+}
+
+object ExtractTile {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+ def apply(input: Column): TypedColumn[Any, Tile] =
+ new Column(new ExtractTile(input.expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetCRS.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala
similarity index 80%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/GetCRS.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala
index 20974b891..1a6d29df0 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetCRS.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,17 +19,17 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.accessors
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
import astraea.spark.rasterframes.encoders.StandardEncoders.crsEncoder
-import astraea.spark.rasterframes.ref.ProjectedRasterLike
+import astraea.spark.rasterframes.expressions.OnTileContextExpression
+import astraea.spark.rasterframes.model.TileContext
import geotrellis.proj4.CRS
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.rf._
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
@@ -38,10 +38,10 @@ import org.apache.spark.sql.{Column, TypedColumn}
*
* @since 9/9/18
*/
-case class GetCRS(child: Expression) extends OnProjectedRasterExpression with CodegenFallback {
+case class GetCRS(child: Expression) extends OnTileContextExpression with CodegenFallback {
override def dataType: DataType = CatalystSerializer[CRS].schema
override def nodeName: String = "crs"
- override def eval(prl: ProjectedRasterLike): InternalRow = prl.crs.toInternalRow
+ override def eval(ctx: TileContext): InternalRow = ctx.crs.toInternalRow
}
object GetCRS {
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetCellType.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/GetCellType.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala
index 34b723631..eeb521e4b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetCellType.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2017 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,12 +15,15 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.accessors
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.OnCellGridExpression
import geotrellis.raster.{CellGrid, CellType}
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetDimensions.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala
similarity index 85%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/GetDimensions.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala
index caba2de23..3589dbc1b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetDimensions.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2017 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,13 +15,16 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.accessors
-import astraea.spark.rasterframes.TileDimensions
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.OnCellGridExpression
+import astraea.spark.rasterframes.model.TileDimensions
import geotrellis.raster.CellGrid
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.expressions.Expression
@@ -41,7 +44,6 @@ case class GetDimensions(child: Expression) extends OnCellGridExpression
}
object GetDimensions {
- import astraea.spark.rasterframes.encoders.SparkDefaultEncoders._
def apply(col: Column): Column =
new Column(new GetDimensions(col.expr)).as[TileDimensions]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala
new file mode 100644
index 000000000..551f64eb0
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala
@@ -0,0 +1,66 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.accessors
+
+import astraea.spark.rasterframes.encoders.EnvelopeEncoder
+import com.vividsolutions.jts.geom.{Envelope, Geometry}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
+import org.apache.spark.sql.jts.AbstractGeometryUDT
+import org.apache.spark.sql.rf._
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, TypedColumn}
+
+/**
+ * Extracts the bounding box (envelope) of arbitrary JTS Geometry.
+ *
+ * @since 2/22/18
+ */
+@deprecated("Replace usages of this with GeometryToBounds", "11/4/2018")
+case class GetEnvelope(child: Expression) extends UnaryExpression with CodegenFallback {
+
+ override def nodeName: String = "envelope"
+ def extractGeometry(expr: Expression, input: Any): Geometry = {
+ input match {
+ case g: Geometry => g
+ case r: InternalRow =>
+ expr.dataType match {
+ case udt: AbstractGeometryUDT[_] => udt.deserialize(r)
+ }
+ }
+ }
+
+ override protected def nullSafeEval(input: Any): Any = {
+ val geom = extractGeometry(child, input)
+ val env = geom.getEnvelopeInternal
+ InternalRow(env.getMinX, env.getMaxX, env.getMinY, env.getMaxY)
+ }
+
+ def dataType: DataType = EnvelopeEncoder.schema
+}
+
+object GetEnvelope {
+ import astraea.spark.rasterframes.encoders.StandardEncoders._
+ def apply(col: Column): TypedColumn[Any, Envelope] =
+ new GetEnvelope(col.expr).asColumn.as[Envelope]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetExtent.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala
similarity index 80%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/GetExtent.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala
index 71f47a053..c3e664887 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/GetExtent.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,12 +19,13 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.accessors
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
import astraea.spark.rasterframes.encoders.StandardEncoders.extentEncoder
-import astraea.spark.rasterframes.ref.ProjectedRasterLike
+import astraea.spark.rasterframes.expressions.OnTileContextExpression
+import astraea.spark.rasterframes.model.TileContext
import geotrellis.vector.Extent
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
@@ -37,10 +38,10 @@ import org.apache.spark.sql.{Column, TypedColumn}
*
* @since 9/10/18
*/
-case class GetExtent(child: Expression) extends OnProjectedRasterExpression with CodegenFallback {
+case class GetExtent(child: Expression) extends OnTileContextExpression with CodegenFallback {
override def dataType: DataType = CatalystSerializer[Extent].schema
override def nodeName: String = "extent"
- override def eval(prl: ProjectedRasterLike): InternalRow = prl.extent.toInternalRow
+ override def eval(ctx: TileContext): InternalRow = ctx.extent.toInternalRow
}
object GetExtent {
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala
new file mode 100644
index 000000000..98b7eb401
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala
@@ -0,0 +1,46 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.accessors
+import astraea.spark.rasterframes.encoders.CatalystSerializer
+import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.UnaryRasterOp
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+
+case class GetTileContext(child: Expression) extends UnaryRasterOp with CodegenFallback {
+ override def dataType: DataType = CatalystSerializer[TileContext].schema
+
+ override def nodeName: String = "get_tile_context"
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any =
+ ctx.map(_.toInternalRow).orNull
+}
+
+object GetTileContext {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.tileContextEncoder
+
+ def apply(input: Column): TypedColumn[Any, TileContext] =
+ new Column(new GetTileContext(input.expr)).as[TileContext]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala
new file mode 100644
index 000000000..0a4424665
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala
@@ -0,0 +1,106 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.aggstats
+
+import astraea.spark.rasterframes.expressions.UnaryRasterAggregate
+import astraea.spark.rasterframes.expressions.tilestats.{DataCells, NoDataCells}
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
+import org.apache.spark.sql.types.{LongType, Metadata}
+import org.apache.spark.sql.{Column, TypedColumn}
+
+/**
+ * Cell count (data or NoData) aggregate function.
+ *
+ * @since 10/5/17
+ * @param isData true if count should be of non-NoData cells, false if count should be of NoData cells.
+ */
+abstract class CellCountAggregate(isData: Boolean) extends UnaryRasterAggregate {
+ private lazy val count =
+ AttributeReference("count", LongType, false, Metadata.empty)()
+
+ override lazy val aggBufferAttributes = Seq(
+ count
+ )
+
+ val initialValues = Seq(
+ Literal(0L)
+ )
+
+ private def CellTest =
+ if (isData) tileOpAsExpression("data_cells", DataCells.op)
+ else tileOpAsExpression("no_data_cells", NoDataCells.op)
+
+ val updateExpressions = Seq(
+ If(IsNull(child), count, Add(count, CellTest(child)))
+ )
+
+ val mergeExpressions = Seq(
+ count.left + count.right
+ )
+
+ val evaluateExpression = count
+
+ def dataType = LongType
+}
+
+object CellCountAggregate {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
+
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Count the total data (non-no-data) cells in a tile column.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ 92384753"""
+ )
+ case class DataCells(child: Expression) extends CellCountAggregate(true) {
+ override def nodeName: String = "agg_data_cells"
+ }
+ object DataCells {
+ def apply(tile: Column): TypedColumn[Any, Long] =
+ new Column(DataCells(tile.expr).toAggregateExpression()).as[Long]
+ }
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Count the total no-data cells in a tile column.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ 23584"""
+ )
+ case class NoDataCells(child: Expression) extends CellCountAggregate(false) {
+ override def nodeName: String = "agg_no_data_cells"
+ }
+ object NoDataCells {
+ def apply(tile: Column): TypedColumn[Any, Long] =
+ new Column(NoDataCells(tile.expr).toAggregateExpression()).as[Long]
+ }
+}
+
+
+
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/CellMeanAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala
similarity index 50%
rename from core/src/main/scala/astraea/spark/rasterframes/functions/CellMeanAggregate.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala
index f682505a7..846f169cb 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/CellMeanAggregate.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2017 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,27 +15,33 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.functions
+package astraea.spark.rasterframes.expressions.aggstats
-import org.apache.spark.sql.{Column, TypedColumn}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
-import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate
-import org.apache.spark.sql.types.{DoubleType, LongType, Metadata}
+import astraea.spark.rasterframes.expressions.UnaryRasterAggregate
+import astraea.spark.rasterframes.expressions.tilestats.{DataCells, Sum}
import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.rf.{TileUDT, _}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
+import org.apache.spark.sql.types.{DoubleType, LongType, Metadata}
+import org.apache.spark.sql.{Column, TypedColumn}
/**
* Cell mean aggregate function
*
* @since 10/5/17
*/
-case class CellMeanAggregate(child: Expression) extends DeclarativeAggregate {
-
- override def prettyName: String = "agg_mean"
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Computes the mean of all cell values.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ....
+ """)
+case class CellMeanAggregate(child: Expression) extends UnaryRasterAggregate {
+ override def nodeName: String = "agg_mean"
private lazy val sum =
AttributeReference("sum", DoubleType, false, Metadata.empty)()
@@ -44,38 +50,36 @@ case class CellMeanAggregate(child: Expression) extends DeclarativeAggregate {
override lazy val aggBufferAttributes = Seq(sum, count)
- val initialValues = Seq(
+ override val initialValues = Seq(
Literal(0.0),
Literal(0L)
)
- private val dataCellCounts = udf(dataCells)
- private val sumCells = udf(tileSum)
-
- val updateExpressions = Seq(
- If(IsNull(child), sum , Add(sum, sumCells(new Column(child)).expr)),
- If(IsNull(child), count, Add(count, dataCellCounts(new Column(child)).expr))
+ // Cant' figure out why we can't just use the Expression directly
+ // this is necessary to properly handle null rows. For example,
+ // if we use `tilestats.Sum` directly, we get an NPE when the stage is executed.
+ private val DataCellCounts = tileOpAsExpression("data_cells", DataCells.op)
+ private val SumCells = tileOpAsExpression("sum_cells", Sum.op)
+
+ override val updateExpressions = Seq(
+ // TODO: Figure out why this doesn't work. See above.
+ //If(IsNull(child), sum , Add(sum, Sum(child))),
+ If(IsNull(child), sum , Add(sum, SumCells(child))),
+ If(IsNull(child), count, Add(count, DataCellCounts(child)))
)
- val mergeExpressions = Seq(
+ override val mergeExpressions = Seq(
sum.left + sum.right,
count.left + count.right
)
- val evaluateExpression = sum / new Cast(count, DoubleType)
-
- def inputTypes = Seq(TileUDT)
-
- def nullable = true
-
- def dataType = DoubleType
-
- def children = Seq(child)
+ override val evaluateExpression = sum / new Cast(count, DoubleType)
+ override def dataType = DoubleType
}
object CellMeanAggregate {
- import astraea.spark.rasterframes.encoders.SparkDefaultEncoders._
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
/** Computes the column aggregate mean. */
def apply(tile: Column): TypedColumn[Any, Double] =
new Column(new CellMeanAggregate(tile.expr).toAggregateExpression()).as[Double]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala
new file mode 100644
index 000000000..cfcde38a5
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala
@@ -0,0 +1,165 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.aggstats
+
+import astraea.spark.rasterframes.expressions.accessors.ExtractTile
+import astraea.spark.rasterframes.stats.CellStatistics
+import geotrellis.raster.{Tile, _}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
+import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
+import org.apache.spark.sql.execution.aggregate.ScalaUDAF
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.{DataType, _}
+import org.apache.spark.sql.{Column, Row, TypedColumn}
+
+/**
+ * Statistics aggregation function for a full column of tiles.
+ *
+ * @since 4/17/17
+ */
+case class CellStatsAggregate() extends UserDefinedAggregateFunction {
+ import CellStatsAggregate.C
+ // TODO: rewrite as a DeclarativeAggregate
+ private val TileType = new TileUDT()
+
+ override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
+
+ override def dataType: DataType = StructType(Seq(
+ StructField("data_cells", LongType),
+ StructField("no_data_cells", LongType),
+ StructField("min", DoubleType),
+ StructField("max", DoubleType),
+ StructField("mean", DoubleType),
+ StructField("variance", DoubleType)
+ ))
+
+ override def bufferSchema: StructType = StructType(Seq(
+ StructField("data_cells", LongType),
+ StructField("no_data_cells", LongType),
+ StructField("min", DoubleType),
+ StructField("max", DoubleType),
+ StructField("sum", DoubleType),
+ StructField("sumSqr", DoubleType)
+ ))
+
+ override def deterministic: Boolean = true
+
+ override def initialize(buffer: MutableAggregationBuffer): Unit = {
+ buffer(C.COUNT) = 0L
+ buffer(C.NODATA) = 0L
+ buffer(C.MIN) = Double.MaxValue
+ buffer(C.MAX) = Double.MinValue
+ buffer(C.SUM) = 0.0
+ buffer(C.SUM_SQRS) = 0.0
+ }
+
+ override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+ if (!input.isNullAt(0)) {
+ val tile = input.getAs[Tile](0)
+ var count = buffer.getLong(C.COUNT)
+ var nodata = buffer.getLong(C.NODATA)
+ var min = buffer.getDouble(C.MIN)
+ var max = buffer.getDouble(C.MAX)
+ var sum = buffer.getDouble(C.SUM)
+ var sumSqr = buffer.getDouble(C.SUM_SQRS)
+
+ tile.foreachDouble(
+ c =>
+ if (isData(c)) {
+ count += 1
+ min = math.min(min, c)
+ max = math.max(max, c)
+ sum = sum + c
+ sumSqr = sumSqr + c * c
+ } else nodata += 1)
+
+ buffer(C.COUNT) = count
+ buffer(C.NODATA) = nodata
+ buffer(C.MIN) = min
+ buffer(C.MAX) = max
+ buffer(C.SUM) = sum
+ buffer(C.SUM_SQRS) = sumSqr
+ }
+ }
+
+ override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+ buffer1(C.COUNT) = buffer1.getLong(C.COUNT) + buffer2.getLong(C.COUNT)
+ buffer1(C.NODATA) = buffer1.getLong(C.NODATA) + buffer2.getLong(C.NODATA)
+ buffer1(C.MIN) = math.min(buffer1.getDouble(C.MIN), buffer2.getDouble(C.MIN))
+ buffer1(C.MAX) = math.max(buffer1.getDouble(C.MAX), buffer2.getDouble(C.MAX))
+ buffer1(C.SUM) = buffer1.getDouble(C.SUM) + buffer2.getDouble(C.SUM)
+ buffer1(C.SUM_SQRS) = buffer1.getDouble(C.SUM_SQRS) + buffer2.getDouble(C.SUM_SQRS)
+ }
+
+ override def evaluate(buffer: Row): Any = {
+ val count = buffer.getLong(C.COUNT)
+ val sum = buffer.getDouble(C.SUM)
+ val sumSqr = buffer.getDouble(C.SUM_SQRS)
+ val mean = sum / count
+ val variance = sumSqr / count - mean * mean
+ Row(count, buffer(C.NODATA), buffer(C.MIN), buffer(C.MAX), mean, variance)
+ }
+}
+
+object CellStatsAggregate {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.cellStatsEncoder
+
+ def apply(col: Column): TypedColumn[Any, CellStatistics] =
+ new Column(new CellStatsAggregateUDAF(col.expr))
+ .as(s"agg_stats($col)") // node renaming in class doesn't seem to propogate
+ .as[CellStatistics]
+
+ /** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute aggregate descriptive cell statistics over a tile column.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ +----------+-------------+---+-----+-------+-----------------+
+ |data_cells|no_data_cells|min|max |mean |variance |
+ +----------+-------------+---+-----+-------+-----------------+
+ |960 |40 |1.0|255.0|127.175|5441.704791666667|
+ +----------+-------------+---+-----+-------+-----------------+"""
+ )
+ class CellStatsAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId)
+ extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
+ def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new CellStatsAggregate()), Complete, false, NamedExpression.newExprId)
+ override def nodeName: String = "agg_stats"
+ }
+ object CellStatsAggregateUDAF {
+ def apply(child: Expression): CellStatsAggregateUDAF = new CellStatsAggregateUDAF(child)
+ }
+
+ /** Column index values. */
+ private object C {
+ final val COUNT = 0
+ final val NODATA = 1
+ final val MIN = 2
+ final val MAX = 3
+ final val SUM = 4
+ final val SUM_SQRS = 5
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala
new file mode 100644
index 000000000..7920415da
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala
@@ -0,0 +1,125 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.aggstats
+
+import java.nio.ByteBuffer
+
+import astraea.spark.rasterframes.expressions.accessors.ExtractTile
+import astraea.spark.rasterframes.functions.safeEval
+import astraea.spark.rasterframes.stats.CellHistogram
+import geotrellis.raster.Tile
+import geotrellis.raster.histogram.{Histogram, StreamingHistogram}
+import geotrellis.spark.util.KryoSerializer
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
+import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
+import org.apache.spark.sql.execution.aggregate.ScalaUDAF
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, Row, TypedColumn}
+
+/**
+ * Histogram aggregation function for a full column of tiles.
+ *
+ * @since 4/24/17
+ */
+case class HistogramAggregate(numBuckets: Int) extends UserDefinedAggregateFunction {
+ def this() = this(StreamingHistogram.DEFAULT_NUM_BUCKETS)
+ // TODO: rewrite as TypedAggregateExpression or similar.
+ private val TileType = new TileUDT()
+
+ override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
+
+ override def bufferSchema: StructType = StructType(StructField("buffer", BinaryType) :: Nil)
+
+ override def dataType: DataType = CellHistogram.schema
+
+ override def deterministic: Boolean = true
+
+ @transient
+ private lazy val ser = KryoSerializer.ser.newInstance()
+
+ @inline
+ private def marshall(hist: Histogram[Double]): Array[Byte] = ser.serialize(hist).array()
+
+ @inline
+ private def unmarshall(blob: Array[Byte]): Histogram[Double] = ser.deserialize(ByteBuffer.wrap(blob))
+
+ override def initialize(buffer: MutableAggregationBuffer): Unit =
+ buffer(0) = marshall(StreamingHistogram(numBuckets))
+
+ private val safeMerge = (h1: Histogram[Double], h2: Histogram[Double]) ⇒ (h1, h2) match {
+ case (null, null) => null
+ case (l, null) => l
+ case (null, r) => r
+ case (l, r) => l merge r
+ }
+
+ override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+ val tile = input.getAs[Tile](0)
+ val hist1 = unmarshall(buffer.getAs[Array[Byte]](0))
+ val hist2 = safeEval(StreamingHistogram.fromTile(_: Tile, numBuckets))(tile)
+ val updatedHist = safeMerge(hist1, hist2)
+ buffer(0) = marshall(updatedHist)
+ }
+
+ override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+ val hist1 = unmarshall(buffer1.getAs[Array[Byte]](0))
+ val hist2 = unmarshall(buffer2.getAs[Array[Byte]](0))
+ val updatedHist = safeMerge(hist1, hist2)
+ buffer1(0) = marshall(updatedHist)
+ }
+
+ override def evaluate(buffer: Row): Any = {
+ val hist = unmarshall(buffer.getAs[Array[Byte]](0))
+ CellHistogram(hist)
+ }
+}
+
+object HistogramAggregate {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.cellHistEncoder
+
+ def apply(col: Column): TypedColumn[Any, CellHistogram] =
+ new Column(new HistogramAggregateUDAF(col.expr))
+ .as(s"agg_approx_histogram($col)") // node renaming in class doesn't seem to propogate
+ .as[CellHistogram]
+
+ /** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute aggregate cell histogram over a tile column.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+ )
+ class HistogramAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId)
+ extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
+ def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new HistogramAggregate()), Complete, false, NamedExpression.newExprId)
+ override def nodeName: String = "agg_approx_histogram"
+ }
+ object HistogramAggregateUDAF {
+ def apply(child: Expression): HistogramAggregateUDAF = new HistogramAggregateUDAF(child)
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala
new file mode 100644
index 000000000..f427d9ee3
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala
@@ -0,0 +1,117 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.aggstats
+
+import astraea.spark.rasterframes.expressions.accessors.ExtractTile
+import astraea.spark.rasterframes.functions.safeBinaryOp
+import geotrellis.raster.mapalgebra.local.{Add, Defined, Undefined}
+import geotrellis.raster.{IntConstantNoDataCellType, Tile}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
+import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
+import org.apache.spark.sql.execution.aggregate.ScalaUDAF
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.{Column, Row, TypedColumn}
+
+/**
+ * Catalyst aggregate function that counts `NoData` values in a cell-wise fashion.
+ *
+ * @param isData true if count should be of non-NoData values, false for NoData values.
+ * @since 8/11/17
+ */
+class LocalCountAggregate(isData: Boolean) extends UserDefinedAggregateFunction {
+
+ private val incCount =
+ if (isData) safeBinaryOp((t1: Tile, t2: Tile) ⇒ Add(t1, Defined(t2)))
+ else safeBinaryOp((t1: Tile, t2: Tile) ⇒ Add(t1, Undefined(t2)))
+
+ private val add = safeBinaryOp(Add.apply(_: Tile, _: Tile))
+
+ private val TileType = new TileUDT()
+
+ override def dataType: DataType = TileType
+
+ override def inputSchema: StructType = StructType(Seq(
+ StructField("value", TileType, true)
+ ))
+
+ override def bufferSchema: StructType = inputSchema
+
+ override def deterministic: Boolean = true
+
+ override def initialize(buffer: MutableAggregationBuffer): Unit =
+ buffer(0) = null
+
+ override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+ val right = input.getAs[Tile](0)
+ if (right != null) {
+ if (buffer(0) == null) {
+ buffer(0) = (
+ if (isData) Defined(right) else Undefined(right)
+ ).convert(IntConstantNoDataCellType)
+ } else {
+ val left = buffer.getAs[Tile](0)
+ buffer(0) = incCount(left, right)
+ }
+ }
+ }
+
+ override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+ buffer1(0) = add(buffer1.getAs[Tile](0), buffer2.getAs[Tile](0))
+ }
+
+ override def evaluate(buffer: Row): Tile = buffer.getAs[Tile](0)
+}
+object LocalCountAggregate {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute cell-wise count of non-no-data values."
+ )
+ class LocalDataCellsUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
+ def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalCountAggregate(true)), Complete, false, NamedExpression.newExprId)
+ override def nodeName: String = "agg_local_data_cells"
+ }
+ object LocalDataCellsUDAF {
+ def apply(child: Expression): LocalDataCellsUDAF = new LocalDataCellsUDAF(child)
+ def apply(tile: Column): TypedColumn[Any, Tile] =
+ new Column(new LocalDataCellsUDAF(tile.expr))
+ .as(s"agg_local_data_cells($tile)")
+ .as[Tile]
+ }
+
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute cell-wise count of no-data values."
+ )
+ class LocalNoDataCellsUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
+ def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalCountAggregate(false)), Complete, false, NamedExpression.newExprId)
+ override def nodeName: String = "agg_local_no_data_cells"
+ }
+ object LocalNoDataCellsUDAF {
+ def apply(child: Expression): LocalNoDataCellsUDAF = new LocalNoDataCellsUDAF(child)
+ def apply(tile: Column): TypedColumn[Any, Tile] =
+ new Column(new LocalNoDataCellsUDAF(tile.expr))
+ .as(s"agg_local_no_data_cells($tile)")
+ .as[Tile]
+ }
+
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala
new file mode 100644
index 000000000..bab1eba20
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala
@@ -0,0 +1,82 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.aggstats
+
+import astraea.spark.rasterframes.expressions.UnaryRasterAggregate
+import astraea.spark.rasterframes.expressions.localops.{Add => AddTiles, Divide => DivideTiles}
+import astraea.spark.rasterframes.expressions.transformers.SetCellType
+import geotrellis.raster.Tile
+import geotrellis.raster.mapalgebra.local
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ExpressionDescription, If, IsNull, Literal}
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Computes a new tile contining the mean cell values across all tiles in column.",
+ note = "All tiles in the column must be the same size."
+)
+case class LocalMeanAggregate(child: Expression) extends UnaryRasterAggregate {
+ private val TileType = new TileUDT()
+
+ override def dataType: DataType = TileType
+ override def nodeName: String = "agg_local_mean"
+
+ private lazy val count =
+ AttributeReference("count", TileType, true)()
+ private lazy val sum =
+ AttributeReference("sum", TileType, true)()
+
+ override def aggBufferAttributes: Seq[AttributeReference] = Seq(
+ count,
+ sum
+ )
+
+ private lazy val Defined = tileOpAsExpression("defined_cells", local.Defined.apply)
+
+ override lazy val initialValues: Seq[Expression] = Seq(
+ Literal.create(null, TileType),
+ Literal.create(null, TileType)
+ )
+ override lazy val updateExpressions: Seq[Expression] = Seq(
+ If(IsNull(count),
+ SetCellType(Defined(child), Literal("int32")),
+ If(IsNull(child), count, AddTiles(count, Defined(child)))
+ ),
+ If(IsNull(sum),
+ SetCellType(child, Literal("float64")),
+ If(IsNull(child), sum, AddTiles(sum, child))
+ )
+ )
+ override val mergeExpressions: Seq[Expression] = Seq(
+ AddTiles(count.left, count.right),
+ AddTiles(sum.left, sum.right)
+ )
+ override lazy val evaluateExpression: Expression = DivideTiles(sum, count)
+}
+object LocalMeanAggregate {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+
+ def apply(tile: Column): TypedColumn[Any, Tile] =
+ new Column(new LocalMeanAggregate(tile.expr).toAggregateExpression()).as[Tile]
+
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/LocalStatsAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala
similarity index 61%
rename from core/src/main/scala/astraea/spark/rasterframes/functions/LocalStatsAggregate.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala
index 13c408f14..8df684a25 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/LocalStatsAggregate.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala
@@ -1,28 +1,39 @@
/*
- * Copyright 2017 Astraea, Inc.
+ * This software is licensed under the Apache 2 license, quoted below.
*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Copyright 2019 Astraea, Inc.
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.functions
+package astraea.spark.rasterframes.expressions.aggstats
+import astraea.spark.rasterframes.expressions.accessors.ExtractTile
+import astraea.spark.rasterframes.functions.safeBinaryOp
+import astraea.spark.rasterframes.stats.LocalCellStatistics
+import astraea.spark.rasterframes.util.DataBiasedOp.{BiasedAdd, BiasedMax, BiasedMin}
import geotrellis.raster.mapalgebra.local._
-import geotrellis.raster.{DoubleConstantNoDataCellType, IntConstantNoDataCellType, IntUserDefinedNoDataCellType, Tile, isNoData}
-import org.apache.spark.sql.Row
+import geotrellis.raster.{DoubleConstantNoDataCellType, IntConstantNoDataCellType, IntUserDefinedNoDataCellType, Tile}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
+import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
+import org.apache.spark.sql.execution.aggregate.ScalaUDAF
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.types._
-import DataBiasedOp._
import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, Row, TypedColumn}
/**
@@ -35,7 +46,9 @@ class LocalStatsAggregate() extends UserDefinedAggregateFunction {
private val TileType = new TileUDT()
- override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
+ override def inputSchema: StructType = StructType(Seq(
+ StructField("value", TileType, true)
+ ))
override def dataType: DataType =
StructType(
@@ -133,6 +146,32 @@ class LocalStatsAggregate() extends UserDefinedAggregateFunction {
}
object LocalStatsAggregate {
+
+ def apply(col: Column): TypedColumn[Any, LocalCellStatistics] =
+ new Column(LocalStatsAggregateUDAF(col.expr))
+ .as(s"agg_local_stats($col)")
+ .as[LocalCellStatistics]
+
+ /** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute cell-local aggregate descriptive statistics for a column of tiles.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+ )
+ class LocalStatsAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId)
+ extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
+ def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalStatsAggregate()), Complete, false, NamedExpression.newExprId)
+ override def nodeName: String = "agg_local_stats"
+ }
+ object LocalStatsAggregateUDAF {
+ def apply(child: Expression): LocalStatsAggregateUDAF = new LocalStatsAggregateUDAF(child)
+ }
+
/** Column index values. */
private object C {
val COUNT = 0
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala
new file mode 100644
index 000000000..7a5032176
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala
@@ -0,0 +1,103 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.aggstats
+
+import astraea.spark.rasterframes.expressions.accessors.ExtractTile
+import astraea.spark.rasterframes.functions.safeBinaryOp
+import astraea.spark.rasterframes.util.DataBiasedOp.{BiasedMax, BiasedMin}
+import geotrellis.raster.Tile
+import geotrellis.raster.mapalgebra.local
+import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
+import org.apache.spark.sql.{Column, Row, TypedColumn}
+import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
+import org.apache.spark.sql.execution.aggregate.ScalaUDAF
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types._
+
+/**
+ * Aggregation function for applying a [[LocalTileBinaryOp]] pairwise across all tiles. Assumes Monoid algebra.
+ *
+ * @since 4/17/17
+ */
+class LocalTileOpAggregate(op: LocalTileBinaryOp) extends UserDefinedAggregateFunction {
+
+ private val safeOp = safeBinaryOp(op.apply(_: Tile, _: Tile))
+
+ private val TileType = new TileUDT()
+
+ override def inputSchema: StructType = StructType(Seq(
+ StructField("value", TileType, true)
+ ))
+
+ override def bufferSchema: StructType = inputSchema
+
+ override def dataType: DataType = TileType
+
+ override def deterministic: Boolean = true
+
+ override def initialize(buffer: MutableAggregationBuffer): Unit =
+ buffer(0) = null
+
+ override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+ if (buffer(0) == null) {
+ buffer(0) = input(0)
+ } else {
+ val t1 = buffer.getAs[Tile](0)
+ val t2 = input.getAs[Tile](0)
+ buffer(0) = safeOp(t1, t2)
+ }
+ }
+
+ override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = update(buffer1, buffer2)
+
+ override def evaluate(buffer: Row): Tile = buffer.getAs[Tile](0)
+}
+
+object LocalTileOpAggregate {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute cell-wise minimum value from a tile column."
+ )
+ class LocalMinUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
+ def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalTileOpAggregate(BiasedMin)), Complete, false, NamedExpression.newExprId)
+ override def nodeName: String = "agg_local_min"
+ }
+ object LocalMinUDAF {
+ def apply(child: Expression): LocalMinUDAF = new LocalMinUDAF(child)
+ def apply(tile: Column): TypedColumn[Any, Tile] = new Column(new LocalMinUDAF(tile.expr)).as[Tile]
+ }
+
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute cell-wise maximum value from a tile column."
+ )
+ class LocalMaxUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
+ def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalTileOpAggregate(BiasedMax)), Complete, false, NamedExpression.newExprId)
+ override def nodeName: String = "agg_local_max"
+ }
+ object LocalMaxUDAF {
+ def apply(child: Expression): LocalMaxUDAF = new LocalMaxUDAF(child)
+ def apply(tile: Column): TypedColumn[Any, Tile] = new Column(new LocalMaxUDAF(tile.expr)).as[Tile]
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/ExplodeTiles.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala
similarity index 88%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/ExplodeTiles.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala
index 9ecdcd007..e39ca1814 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/ExplodeTiles.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2017 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.generators
import astraea.spark.rasterframes._
import astraea.spark.rasterframes.encoders.CatalystSerializer._
@@ -27,6 +29,7 @@ import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, Generator, GenericInternalRow}
+import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types._
import spire.syntax.cfor.cfor
@@ -36,9 +39,10 @@ import spire.syntax.cfor.cfor
* @since 4/12/17
*/
case class ExplodeTiles(
- sampleFraction: Double = 1.0, seed: Option[Long] = None, override val children: Seq[Expression])
- extends Expression with Generator with CodegenFallback {
+ sampleFraction: Double , seed: Option[Long], override val children: Seq[Expression])
+ extends Expression with Generator with CodegenFallback {
+ def this(children: Seq[Expression]) = this(1.0, None, children)
override def nodeName: String = "explode_tiles"
override def elementSchema: StructType = {
@@ -64,7 +68,7 @@ case class ExplodeTiles(
val tiles = Array.ofDim[Tile](children.length)
cfor(0)(_ < tiles.length, _ + 1) { index =>
val row = children(index).eval(input).asInstanceOf[InternalRow]
- tiles(index) = if(row != null) row.to[Tile] else null
+ tiles(index) = if(row != null) row.to[Tile](TileUDT.tileSerializer) else null
}
val dims = tiles.filter(_ != null).map(_.dimensions)
if(dims.isEmpty) Seq.empty[InternalRow]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala
new file mode 100644
index 000000000..d7f1a7867
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala
@@ -0,0 +1,75 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor
+import astraea.spark.rasterframes.expressions.{BinaryLocalRasterOp, DynamicExtractors}
+import astraea.spark.rasterframes.util.DataBiasedOp.BiasedAdd
+import geotrellis.raster.Tile
+import org.apache.spark.sql.rf._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile, rhs) - Performs cell-wise addition between two tiles or a tile and a scalar.",
+ arguments = """
+ Arguments:
+ * tile - left-hand-side tile
+ * rhs - a tile or scalar value to add to each cell""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile, 1.5);
+ ...
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Add(left: Expression, right: Expression) extends BinaryLocalRasterOp
+ with CodegenFallback {
+ override val nodeName: String = "local_add"
+ override protected def op(left: Tile, right: Tile): Tile = BiasedAdd(left, right)
+ override protected def op(left: Tile, right: Double): Tile = BiasedAdd(left, right)
+ override protected def op(left: Tile, right: Int): Tile = BiasedAdd(left, right)
+
+ override def eval(input: InternalRow): Any = {
+ if(input == null) null
+ else {
+ val l = left.eval(input)
+ val r = right.eval(input)
+ if (l == null && r == null) null
+ else if (l == null) r
+ else if (r == null && tileExtractor.isDefinedAt(right.dataType)) l
+ else if (r == null) null
+ else nullSafeEval(l, r)
+ }
+ }
+}
+object Add {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Add(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Add(tile.expr, lit(value).expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala
new file mode 100644
index 000000000..37aa4ab6c
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala
@@ -0,0 +1,57 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile, rhs) - Performs cell-wise division between two tiles or a tile and a scalar.",
+ arguments = """
+ Arguments:
+ * tile - left-hand-side tile
+ * rhs - a tile or scalar value to add to each cell""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile, 1.5);
+ ...
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Divide(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_divide"
+ override protected def op(left: Tile, right: Tile): Tile = left.localDivide(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localDivide(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localDivide(right)
+}
+object Divide {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Divide(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Divide(tile.expr, lit(value).expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala
new file mode 100644
index 000000000..610b8beff
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala
@@ -0,0 +1,56 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(lhs, rhs) - Performs cell-wise equality test between two tiles.",
+ arguments = """
+ Arguments:
+ * lhs - first tile argument
+ * rhs - second tile argument""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Equal(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_equal"
+ override protected def op(left: Tile, right: Tile): Tile = left.localEqual(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localEqual(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localEqual(right)
+}
+
+object Equal {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Equal(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Equal(tile.expr, lit(value).expr)).as[Tile]
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala
new file mode 100644
index 000000000..40d34ee06
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala
@@ -0,0 +1,116 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.{UnaryLocalRasterOp, fpTile}
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Performs cell-wise exponential.",
+ arguments = """
+ Arguments:
+ * tile - input tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Exp(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "exp"
+
+ override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(math.E)
+
+ override def dataType: DataType = child.dataType
+}
+object Exp {
+ def apply(tile: Column): TypedColumn[Any, Tile] =
+ new Column(Exp(tile.expr)).as[Tile]
+}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute 10 to the power of cell values.",
+ arguments = """
+ Arguments:
+ * tile - input tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Exp10(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "log10"
+
+ override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(10.0)
+
+ override def dataType: DataType = child.dataType
+}
+object Exp10 {
+ def apply(tile: Column): TypedColumn[Any, Tile] = new Column(Exp10(tile.expr)).as[Tile]
+}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute 2 to the power of cell values.",
+ arguments = """
+ Arguments:
+ * tile - input tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Exp2(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "exp2"
+
+ override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(2.0)
+
+ override def dataType: DataType = child.dataType
+}
+object Exp2{
+ def apply(tile: Column): TypedColumn[Any, Tile] = new Column(Exp2(tile.expr)).as[Tile]
+}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Performs cell-wise exponential, then subtract one.",
+ arguments = """
+ Arguments:
+ * tile - input tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class ExpM1(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "expm1"
+
+ override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(math.E).localSubtract(1.0)
+
+ override def dataType: DataType = child.dataType
+}
+object ExpM1{
+ def apply(tile: Column): TypedColumn[Any, Tile] = new Column(ExpM1(tile.expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala
new file mode 100644
index 000000000..f78022972
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala
@@ -0,0 +1,55 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(lhs, rhs) - Performs cell-wise greater-than (>) test between two tiles.",
+ arguments = """
+ Arguments:
+ * lhs - first tile argument
+ * rhs - second tile argument""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Greater(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_greater"
+ override protected def op(left: Tile, right: Tile): Tile = left.localGreater(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localGreater(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localGreater(right)
+}
+
+object Greater {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Greater(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Greater(tile.expr, lit(value).expr)).as[Tile]
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala
new file mode 100644
index 000000000..bf43ceca5
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala
@@ -0,0 +1,56 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(lhs, rhs) - Performs cell-wise greater-than-or-equal (>=) test between two tiles.",
+ arguments = """
+ Arguments:
+ * lhs - first tile argument
+ * rhs - second tile argument""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class GreaterEqual(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_greater_equal"
+ override protected def op(left: Tile, right: Tile): Tile = left.localGreaterOrEqual(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localGreaterOrEqual(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localGreaterOrEqual(right)
+}
+
+object GreaterEqual {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(GreaterEqual(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(GreaterEqual(tile.expr, lit(value).expr)).as[Tile]
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala
new file mode 100644
index 000000000..4f8d4ad7b
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala
@@ -0,0 +1,54 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(lhs, rhs) - Performs cell-wise less-than (<) test between two tiles.",
+ arguments = """
+ Arguments:
+ * lhs - first tile argument
+ * rhs - second tile argument""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Less(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_less"
+ override protected def op(left: Tile, right: Tile): Tile = left.localLess(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localLess(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localLess(right)
+}
+object Less {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Less(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Less(tile.expr, lit(value).expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala
new file mode 100644
index 000000000..983ac7c0d
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala
@@ -0,0 +1,55 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(lhs, rhs) - Performs cell-wise less-than-or-equal (<=) test between two tiles.",
+ arguments = """
+ Arguments:
+ * lhs - first tile argument
+ * rhs - second tile argument""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class LessEqual(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_less_equal"
+ override protected def op(left: Tile, right: Tile): Tile = left.localLessOrEqual(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localLessOrEqual(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localLessOrEqual(right)
+}
+object LessEqual {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(LessEqual(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(LessEqual(tile.expr, lit(value).expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala
new file mode 100644
index 000000000..e2da78ce1
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala
@@ -0,0 +1,116 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.{UnaryLocalRasterOp, fpTile}
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Performs cell-wise natural logarithm.",
+ arguments = """
+ Arguments:
+ * tile - input tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Log(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "log"
+
+ override protected def op(tile: Tile): Tile = fpTile(tile).localLog()
+
+ override def dataType: DataType = child.dataType
+}
+object Log {
+ def apply(tile: Column): TypedColumn[Any, Tile] =
+ new Column(Log(tile.expr)).as[Tile]
+}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Performs cell-wise logarithm with base 10.",
+ arguments = """
+ Arguments:
+ * tile - input tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Log10(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "log10"
+
+ override protected def op(tile: Tile): Tile = fpTile(tile).localLog10()
+
+ override def dataType: DataType = child.dataType
+}
+object Log10 {
+ def apply(tile: Column): TypedColumn[Any, Tile] = new Column(Log10(tile.expr)).as[Tile]
+}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Performs cell-wise logarithm with base 2.",
+ arguments = """
+ Arguments:
+ * tile - input tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Log2(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "log2"
+
+ override protected def op(tile: Tile): Tile = fpTile(tile).localLog() / math.log(2.0)
+
+ override def dataType: DataType = child.dataType
+}
+object Log2{
+ def apply(tile: Column): TypedColumn[Any, Tile] = new Column(Log2(tile.expr)).as[Tile]
+}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Performs natural logarithm of cell values plus one.",
+ arguments = """
+ Arguments:
+ * tile - input tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Log1p(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "log1p"
+
+ override protected def op(tile: Tile): Tile = fpTile(tile).localAdd(1.0).localLog()
+
+ override def dataType: DataType = child.dataType
+}
+object Log1p{
+ def apply(tile: Column): TypedColumn[Any, Tile] = new Column(Log1p(tile.expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala
new file mode 100644
index 000000000..7ed7c76b8
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala
@@ -0,0 +1,56 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile, rhs) - Performs cell-wise multiplication between two tiles or a tile and a scalar.",
+ arguments = """
+ Arguments:
+ * tile - left-hand-side tile
+ * rhs - a tile or scalar value to add to each cell""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile, 1.5);
+ ...
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Multiply(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_multiply"
+ override protected def op(left: Tile, right: Tile): Tile = left.localMultiply(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localMultiply(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localMultiply(right)
+}
+object Multiply {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Multiply(left.expr, right.expr)).as[Tile]
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Multiply(tile.expr, lit(value).expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala
new file mode 100644
index 000000000..5760582d6
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala
@@ -0,0 +1,54 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+import astraea.spark.rasterframes.expressions.fpTile
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+
+@ExpressionDescription(
+ usage = "_FUNC_(left, right) - Computes the normalized difference '(left - right) / (left + right)' between two tile columns",
+ note = "Common usage includes computing NDVI via red and NIR bands.",
+ arguments = """
+ Arguments:
+ * left - first tile argument
+ * right - second tile argument""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(nir, red);
+ ..."""
+)
+case class NormalizedDifference(left: Expression, right: Expression) extends BinaryRasterOp with CodegenFallback {
+ override val nodeName: String = "normalized_difference"
+ override protected def op(left: Tile, right: Tile): Tile = {
+ val diff = fpTile(left.localSubtract(right))
+ val sum = fpTile(left.localAdd(right))
+ diff.localDivide(sum)
+ }
+}
+object NormalizedDifference {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(NormalizedDifference(left.expr, right.expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala
new file mode 100644
index 000000000..fd2ae2f29
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala
@@ -0,0 +1,76 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import geotrellis.raster.resample.NearestNeighbor
+import org.apache.spark.sql.rf._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile, factor) - Resample tile to different size based on scalar factor or tile whose dimension to match. Scalar less than one will downsample tile; greater than one will upsample. Uses nearest-neighbor value.",
+ arguments = """
+ Arguments:
+ * tile - tile
+ * rhs - scalar or tile to match dimension""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile, 2.0);
+ ...
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Resample(left: Expression, right: Expression) extends BinaryLocalRasterOp
+ with CodegenFallback {
+ override val nodeName: String = "resample"
+ override protected def op(left: Tile, right: Tile): Tile = left.resample(right.cols, right.rows, NearestNeighbor)
+ override protected def op(left: Tile, right: Double): Tile = left.resample((left.cols * right).toInt,
+ (left.rows * right).toInt, NearestNeighbor)
+ override protected def op(left: Tile, right: Int): Tile = op(left, right.toDouble)
+
+ override def eval(input: InternalRow): Any = {
+ if(input == null) null
+ else {
+ val l = left.eval(input)
+ val r = right.eval(input)
+ if (l == null && r == null) null
+ else if (l == null) r
+ else if (r == null && tileExtractor.isDefinedAt(right.dataType)) l
+ else if (r == null) null
+ else nullSafeEval(l, r)
+ }
+ }
+}
+object Resample{
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Resample(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Resample(tile.expr, lit(value).expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala
new file mode 100644
index 000000000..010666e17
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala
@@ -0,0 +1,52 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp}
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Round cell values to the nearest integer without changing the cell type.",
+ arguments = """
+ Arguments:
+ * tile - tile column to round""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Round(child: Expression) extends UnaryLocalRasterOp
+ with NullToValue with CodegenFallback {
+ override def nodeName: String = "round"
+ override def na: Any = null
+ override protected def op(child: Tile): Tile = child.localRound()
+}
+object Round{
+
+ def apply(tile: Column): TypedColumn[Any, Tile] =
+ new Column(Round(tile.expr)).as[Tile]
+
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala
new file mode 100644
index 000000000..203bb578d
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala
@@ -0,0 +1,56 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile, rhs) - Performs cell-wise subtraction between two tiles or a tile and a scalar.",
+ arguments = """
+ Arguments:
+ * tile - left-hand-side tile
+ * rhs - a tile or scalar value to add to each cell""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile, 1.5);
+ ...
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Subtract(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_subtract"
+ override protected def op(left: Tile, right: Tile): Tile = left.localSubtract(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localSubtract(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localSubtract(right)
+}
+object Subtract {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Subtract(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Subtract(tile.expr, lit(value).expr)).as[Tile]
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala
new file mode 100644
index 000000000..f3342b9c6
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala
@@ -0,0 +1,56 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.localops
+
+import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(lhs, rhs) - Performs cell-wise inequality test between two tiles.",
+ arguments = """
+ Arguments:
+ * lhs - first tile argument
+ * rhs - second tile argument""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class Unequal(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
+ override val nodeName: String = "local_unequal"
+ override protected def op(left: Tile, right: Tile): Tile = left.localUnequal(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localUnequal(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localUnequal(right)
+}
+
+object Unequal {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(Unequal(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(Unequal(tile.expr, lit(value).expr)).as[Tile]
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala
index 3255dd719..e4c0bcc00 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala
@@ -19,11 +19,21 @@
package astraea.spark.rasterframes
-import org.apache.spark.sql.catalyst.InternalRow
+import astraea.spark.rasterframes.expressions.accessors._
+import astraea.spark.rasterframes.expressions.aggstats._
+import astraea.spark.rasterframes.expressions.generators._
+import astraea.spark.rasterframes.expressions.localops._
+import astraea.spark.rasterframes.expressions.tilestats._
+import astraea.spark.rasterframes.expressions.transformers._
+import geotrellis.raster.{DoubleConstantNoDataCellType, Tile}
+import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.rf.VersionShims
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
+import org.apache.spark.sql.rf.VersionShims._
import org.apache.spark.sql.{SQLContext, rf}
+import scala.util.Try
+import scala.reflect.runtime.universe._
/**
* Module of Catalyst expressions for efficiently working with tiles.
*
@@ -31,20 +41,78 @@ import org.apache.spark.sql.{SQLContext, rf}
*/
package object expressions {
private[expressions] def row(input: Any) = input.asInstanceOf[InternalRow]
+ /** Convert the tile to a floating point type as needed for scalar operations. */
+ @inline
+ private[expressions]
+ def fpTile(t: Tile) = if (t.cellType.isFloatingPoint) t else t.convert(DoubleConstantNoDataCellType)
- /** Unary expression builder builder. */
- private def ub[A, B](f: A ⇒ B)(a: Seq[A]): B = f(a.head)
- /** Binary expression builder builder. */
- private def bb[A, B](f: (A, A) ⇒ B)(a: Seq[A]): B = f(a.head, a.last)
+ /** As opposed to `udf`, this constructs an unwrapped ScalaUDF Expression from a function. */
+ private[expressions]
+ def udfexpr[RT: TypeTag, A1: TypeTag](name: String, f: A1 => RT): Expression => ScalaUDF = (child: Expression) => {
+ val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
+ val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: Nil).toOption
+ ScalaUDF(f, dataType, Seq(child), inputTypes.getOrElse(Nil), nullable = nullable, udfName = Some(name))
+ }
def register(sqlContext: SQLContext): Unit = {
// Expression-oriented functions have a different registration scheme
// Currently have to register with the `builtin` registry due to Spark data hiding.
val registry: FunctionRegistry = rf.registry(sqlContext)
- VersionShims.registerExpression(registry, "rf_explode_tiles", ExplodeTiles.apply(1.0, None, _))
- VersionShims.registerExpression(registry, "rf_cell_type", ub(GetCellType.apply))
- VersionShims.registerExpression(registry, "rf_convert_cell_type", bb(SetCellType.apply))
- VersionShims.registerExpression(registry, "rf_tile_dimensions", ub(GetDimensions.apply))
- VersionShims.registerExpression(registry, "rf_bounds_geometry", ub(BoundsToGeometry.apply))
+
+ registry.registerExpression[Add]("rf_local_add")
+ registry.registerExpression[Subtract]("rf_local_subtract")
+ registry.registerExpression[ExplodeTiles]("rf_explode_tiles")
+ registry.registerExpression[GetCellType]("rf_cell_type")
+ registry.registerExpression[SetCellType]("rf_convert_cell_type")
+ registry.registerExpression[GetDimensions]("rf_tile_dimensions")
+ registry.registerExpression[BoundsToGeometry]("rf_bounds_geometry")
+ registry.registerExpression[Subtract]("rf_local_subtract")
+ registry.registerExpression[Multiply]("rf_local_multiply")
+ registry.registerExpression[Divide]("rf_local_divide")
+ registry.registerExpression[NormalizedDifference]("rf_normalized_difference")
+ registry.registerExpression[Less]("rf_local_less")
+ registry.registerExpression[Greater]("rf_local_greater")
+ registry.registerExpression[LessEqual]("rf_local_less_equal")
+ registry.registerExpression[GreaterEqual]("rf_local_greater_equal")
+ registry.registerExpression[Equal]("rf_local_equal")
+ registry.registerExpression[Unequal]("rf_local_unequal")
+ registry.registerExpression[Sum]("rf_tile_sum")
+ registry.registerExpression[Round]("rf_round")
+ registry.registerExpression[Log]("rf_log")
+ registry.registerExpression[Log10]("rf_log10")
+ registry.registerExpression[Log2]("rf_log2")
+ registry.registerExpression[Log1p]("rf_log1p")
+ registry.registerExpression[Exp]("rf_exp")
+ registry.registerExpression[Exp10]("rf_exp10")
+ registry.registerExpression[Exp2]("rf_exp2")
+ registry.registerExpression[ExpM1]("rf_expm1")
+ registry.registerExpression[Resample]("rf_resample")
+ registry.registerExpression[TileToArrayDouble]("rf_tile_to_array_double")
+ registry.registerExpression[TileToArrayInt]("rf_tile_to_array_int")
+ registry.registerExpression[DataCells]("rf_data_cells")
+ registry.registerExpression[NoDataCells]("rf_no_data_cells")
+ registry.registerExpression[IsNoDataTile]("rf_is_no_data_tile")
+ registry.registerExpression[TileMin]("rf_tile_min")
+ registry.registerExpression[TileMax]("rf_tile_max")
+ registry.registerExpression[TileMean]("rf_tile_mean")
+ registry.registerExpression[TileStats]("rf_tile_stats")
+ registry.registerExpression[TileHistogram]("rf_tile_histogram")
+ registry.registerExpression[CellCountAggregate.DataCells]("rf_agg_data_cells")
+ registry.registerExpression[CellCountAggregate.NoDataCells]("rf_agg_no_data_cells")
+ registry.registerExpression[CellStatsAggregate.CellStatsAggregateUDAF]("rf_agg_stats")
+ registry.registerExpression[HistogramAggregate.HistogramAggregateUDAF]("rf_agg_approx_histogram")
+ registry.registerExpression[LocalStatsAggregate.LocalStatsAggregateUDAF]("rf_agg_local_stats")
+ registry.registerExpression[LocalTileOpAggregate.LocalMinUDAF]("rf_agg_local_min")
+ registry.registerExpression[LocalTileOpAggregate.LocalMaxUDAF]("rf_agg_local_max")
+ registry.registerExpression[LocalCountAggregate.LocalDataCellsUDAF]("rf_agg_local_data_cells")
+ registry.registerExpression[LocalCountAggregate.LocalNoDataCellsUDAF]("rf_agg_local_no_data_cells")
+ registry.registerExpression[LocalMeanAggregate]("rf_agg_local_mean")
+
+ registry.registerExpression[Mask.MaskByDefined]("rf_mask")
+ registry.registerExpression[Mask.MaskByValue]("rf_mask_by_value")
+ registry.registerExpression[Mask.InverseMaskByDefined]("rf_inverse_mask")
+
+ registry.registerExpression[DebugRender.RenderAscii]("rf_render_ascii")
+ registry.registerExpression[DebugRender.RenderMatrix]("rf_render_matrix")
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala
new file mode 100644
index 000000000..a7d49c4ae
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala
@@ -0,0 +1,62 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+import astraea.spark.rasterframes.expressions.{UnaryRasterOp, NullToValue}
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster._
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.{DataType, LongType}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Counts the number of non-no-data cells in a tile",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ 357"""
+)
+case class DataCells(child: Expression) extends UnaryRasterOp
+ with CodegenFallback with NullToValue {
+ override def nodeName: String = "data_cells"
+ override def dataType: DataType = LongType
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = DataCells.op(tile)
+ override def na: Any = 0L
+}
+object DataCells {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
+ def apply(tile: Column): TypedColumn[Any, Long] =
+ new Column(DataCells(tile.expr)).as[Long]
+
+ val op = (tile: Tile) => {
+ var count: Long = 0
+ tile.dualForeach(
+ z ⇒ if(isData(z)) count = count + 1
+ ) (
+ z ⇒ if(isData(z)) count = count + 1
+ )
+ count
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala
new file mode 100644
index 000000000..7b360a07c
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala
@@ -0,0 +1,52 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp}
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster._
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types.{BooleanType, DataType}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Produces `true` if all the cells in a given tile are no-data",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ false"""
+)
+case class IsNoDataTile(child: Expression) extends UnaryRasterOp
+ with CodegenFallback with NullToValue {
+ override def nodeName: String = "is_no_data_tile"
+ override def na: Any = true
+ override def dataType: DataType = BooleanType
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = tile.isNoDataTile
+}
+object IsNoDataTile {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc
+ def apply(tile: Column): TypedColumn[Any, Boolean] =
+ new Column(IsNoDataTile(tile.expr)).as[Boolean]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala
new file mode 100644
index 000000000..89c2ae10b
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala
@@ -0,0 +1,63 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+
+import astraea.spark.rasterframes.expressions.{UnaryRasterOp, NullToValue}
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster._
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.{DataType, LongType}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Counts the number of no-data cells in a tile",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ 12"""
+)
+case class NoDataCells(child: Expression) extends UnaryRasterOp
+ with CodegenFallback with NullToValue {
+ override def nodeName: String = "no_data_cells"
+ override def dataType: DataType = LongType
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = NoDataCells.op(tile)
+ override def na: Any = 0L
+}
+object NoDataCells {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
+ def apply(tile: Column): TypedColumn[Any, Long] =
+ new Column(NoDataCells(tile.expr)).as[Long]
+
+ val op = (tile: Tile) => {
+ var count: Long = 0
+ tile.dualForeach(
+ z ⇒ if(isNoData(z)) count = count + 1
+ ) (
+ z ⇒ if(isNoData(z)) count = count + 1
+ )
+ count
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala
new file mode 100644
index 000000000..cfa10666b
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala
@@ -0,0 +1,57 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+import astraea.spark.rasterframes.expressions.UnaryRasterOp
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster._
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.{DataType, DoubleType}
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Computes the sum of all the cells in a tile..",
+ arguments = """
+ Arguments:
+ * tile - tile to sum up""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile5);
+ 2135.34"""
+)
+case class Sum(child: Expression) extends UnaryRasterOp with CodegenFallback {
+ override def nodeName: String = "tile_sum"
+ override def dataType: DataType = DoubleType
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = Sum.op(tile)
+}
+
+object Sum {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+ def apply(tile: Column): TypedColumn[Any, Double] =
+ new Column(Sum(tile.expr)).as[Double]
+
+ def op = (tile: Tile) => {
+ var sum: Double = 0.0
+ tile.foreachDouble(z ⇒ if(isData(z)) sum = sum + z)
+ sum
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala
new file mode 100644
index 000000000..d7fe7d0c1
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala
@@ -0,0 +1,60 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+
+import astraea.spark.rasterframes.expressions.UnaryRasterOp
+import astraea.spark.rasterframes.model.TileContext
+import astraea.spark.rasterframes.stats.CellHistogram
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Computes per-tile histogram.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class TileHistogram(child: Expression) extends UnaryRasterOp
+ with CodegenFallback {
+ override def nodeName: String = "tile_histogram"
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any =
+ TileHistogram.converter(TileHistogram.op(tile))
+ override def dataType: DataType = CellHistogram.schema
+}
+
+object TileHistogram {
+ def apply(tile: Column): TypedColumn[Any, CellHistogram] =
+ new Column(TileHistogram(tile.expr)).as[CellHistogram]
+
+ private lazy val converter = CatalystTypeConverters.createToCatalystConverter(CellHistogram.schema)
+
+ /** Single tile histogram. */
+ val op = (t: Tile) ⇒ CellHistogram(t)
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala
new file mode 100644
index 000000000..0e2595b2a
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala
@@ -0,0 +1,62 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+
+import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp}
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster.{Tile, isData}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types.{DataType, DoubleType}
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Determines the maximum cell value.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ 1"""
+)
+case class TileMax(child: Expression) extends UnaryRasterOp
+ with NullToValue with CodegenFallback {
+ override def nodeName: String = "tile_max"
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMax.op(tile)
+ override def dataType: DataType = DoubleType
+ override def na: Any = Double.MinValue
+}
+object TileMax {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+
+ def apply(tile: Column): TypedColumn[Any, Double] =
+ new Column(TileMax(tile.expr)).as[Double]
+
+ /** Find the maximum cell value. */
+ val op = (tile: Tile) ⇒ {
+ var max: Double = Double.MinValue
+ tile.foreachDouble(z ⇒ if(isData(z)) max = math.max(max, z))
+ if (max == Double.MinValue) Double.NaN
+ else max
+ }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala
new file mode 100644
index 000000000..e23e68c08
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala
@@ -0,0 +1,67 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+
+import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp}
+import astraea.spark.rasterframes.functions.safeEval
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster.{Tile, isData}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types.{DataType, DoubleType}
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Computes the mean cell value of a tile.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ -1"""
+)
+case class TileMean(child: Expression) extends UnaryRasterOp
+ with NullToValue with CodegenFallback {
+ override def nodeName: String = "tile_mean"
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMean.op(tile)
+ override def dataType: DataType = DoubleType
+ override def na: Any = Double.NaN
+}
+object TileMean {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+
+ def apply(tile: Column): TypedColumn[Any, Double] =
+ new Column(TileMean(tile.expr)).as[Double]
+
+ /** Single tile mean. */
+ val op = (t: Tile) ⇒ {
+ var sum: Double = 0.0
+ var count: Long = 0
+ t.dualForeach(
+ z ⇒ if(isData(z)) { count = count + 1; sum = sum + z }
+ ) (
+ z ⇒ if(isData(z)) { count = count + 1; sum = sum + z }
+ )
+ sum/count
+ }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala
new file mode 100644
index 000000000..4d2edc9b3
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala
@@ -0,0 +1,62 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+
+import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp}
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster.{Tile, isData}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types.{DataType, DoubleType}
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Determines the minimum cell value.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ -1"""
+)
+case class TileMin(child: Expression) extends UnaryRasterOp
+ with NullToValue with CodegenFallback {
+ override def nodeName: String = "tile_min"
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMin.op(tile)
+ override def dataType: DataType = DoubleType
+ override def na: Any = Double.MaxValue
+}
+object TileMin {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+
+ def apply(tile: Column): TypedColumn[Any, Double] =
+ new Column(TileMin(tile.expr)).as[Double]
+
+ /** Find the minimum cell value. */
+ val op = (tile: Tile) ⇒ {
+ var min: Double = Double.MaxValue
+ tile.foreachDouble(z ⇒ if(isData(z)) min = math.min(min, z))
+ if (min == Double.MaxValue) Double.NaN
+ else min
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala
new file mode 100644
index 000000000..015f048e8
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala
@@ -0,0 +1,59 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.tilestats
+
+import astraea.spark.rasterframes.expressions.UnaryRasterOp
+import astraea.spark.rasterframes.model.TileContext
+import astraea.spark.rasterframes.stats.CellStatistics
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Computes per-tile descriptive statistics.",
+ arguments = """
+ Arguments:
+ * tile - tile column to analyze""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class TileStats(child: Expression) extends UnaryRasterOp
+ with CodegenFallback {
+ override def nodeName: String = "tile_stats"
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any =
+ TileStats.converter(TileStats.op(tile).orNull)
+ override def dataType: DataType = CellStatistics.schema
+}
+object TileStats {
+ def apply(tile: Column): TypedColumn[Any, CellStatistics] =
+ new Column(TileStats(tile.expr)).as[CellStatistics]
+
+ private lazy val converter = CatalystTypeConverters.createToCatalystConverter(CellStatistics.schema)
+
+ /** Single tile statistics. */
+ val op = (t: Tile) ⇒ CellStatistics(t)
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/BoundsToGeometry.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala
similarity index 94%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/BoundsToGeometry.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala
index 0f07549c2..9d6a8c652 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/BoundsToGeometry.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,10 +19,11 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.transformers
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.row
import com.vividsolutions.jts.geom.{Envelope, Geometry}
import geotrellis.vector.Extent
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala
new file mode 100644
index 000000000..c26cc6b51
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala
@@ -0,0 +1,77 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.transformers
+import astraea.spark.rasterframes.expressions.UnaryRasterOp
+import astraea.spark.rasterframes.model.TileContext
+import astraea.spark.rasterframes.util.TileAsMatrix
+import geotrellis.raster.Tile
+import geotrellis.raster.render.ascii.AsciiArtEncoder
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types.{DataType, StringType}
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.apache.spark.unsafe.types.UTF8String
+
+abstract class DebugRender(asciiArt: Boolean) extends UnaryRasterOp
+ with CodegenFallback with Serializable {
+ override def dataType: DataType = StringType
+
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = {
+ UTF8String.fromString(if (asciiArt)
+ s"\n${tile.renderAscii(AsciiArtEncoder.Palette.NARROW)}\n"
+ else
+ s"\n${tile.renderMatrix(6)}\n"
+ )
+ }
+}
+
+object DebugRender {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.stringEnc
+
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Coverts the contents of the given tile an ASCII art string rendering",
+ arguments = """
+ Arguments:
+ * tile - tile to render"""
+ )
+ case class RenderAscii(child: Expression) extends DebugRender(true) {
+ override def nodeName: String = "render_ascii"
+ }
+ object RenderAscii {
+ def apply(tile: Column): TypedColumn[Any, String] =
+ new Column(RenderAscii(tile.expr)).as[String]
+ }
+
+ @ExpressionDescription(
+ usage = "_FUNC_(tile) - Coverts the contents of the given tile to a 2-d array of numberic values",
+ arguments = """
+ Arguments:
+ * tile - tile to render"""
+ )
+ case class RenderMatrix(child: Expression) extends DebugRender(false) {
+ override def nodeName: String = "render_matrix"
+ }
+ object RenderMatrix {
+ def apply(tile: Column): TypedColumn[Any, String] =
+ new Column(RenderMatrix(tile.expr)).as[String]
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/GeometryToBounds.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala
similarity index 96%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/GeometryToBounds.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala
index bee66a7a9..4e08ad9ea 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/GeometryToBounds.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,8 +19,7 @@
*
*/
-package astraea.spark.rasterframes.expressions
-
+package astraea.spark.rasterframes.expressions.transformers
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala
new file mode 100644
index 000000000..03e81efc2
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala
@@ -0,0 +1,145 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.transformers
+import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import astraea.spark.rasterframes.expressions.row
+import com.typesafe.scalalogging.LazyLogging
+import geotrellis.raster
+import geotrellis.raster.Tile
+import geotrellis.raster.mapalgebra.local.{Defined, InverseMask => gtInverseMask, Mask => gtMask}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, Literal, TernaryExpression}
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+
+abstract class Mask(val left: Expression, val middle: Expression, val right: Expression, inverse: Boolean)
+ extends TernaryExpression with CodegenFallback with Serializable with LazyLogging {
+
+ override def children: Seq[Expression] = Seq(left, middle, right)
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (!tileExtractor.isDefinedAt(left.dataType)) {
+ TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.")
+ } else if (!tileExtractor.isDefinedAt(middle.dataType)) {
+ TypeCheckFailure(s"Input type '${middle.dataType}' does not conform to a raster type.")
+ } else if (!intArgExtractor.isDefinedAt(right.dataType)) {
+ TypeCheckFailure(s"Input type '${right.dataType}' isn't an integral type.")
+ } else TypeCheckSuccess
+ }
+ override def dataType: DataType = left.dataType
+
+ override protected def nullSafeEval(leftInput: Any, middleInput: Any, rightInput: Any): Any = {
+ implicit val tileSer = TileUDT.tileSerializer
+ val (leftTile, leftCtx) = tileExtractor(left.dataType)(row(leftInput))
+ val (rightTile, rightCtx) = tileExtractor(middle.dataType)(row(middleInput))
+
+ if (leftCtx.isEmpty && rightCtx.isDefined)
+ logger.warn(
+ s"Right-hand parameter '${middle}' provided an extent and CRS, but the left-hand parameter " +
+ s"'${left}' didn't have any. Because the left-hand side defines output type, the right-hand context will be lost.")
+
+ if (leftCtx.isDefined && rightCtx.isDefined && leftCtx != rightCtx)
+ logger.warn(s"Both '${left}' and '${middle}' provided an extent and CRS, but they are different. Left-hand side will be used.")
+
+ val maskValue = intArgExtractor(right.dataType)(rightInput)
+
+ val masking = if (maskValue.value == 0) Defined(rightTile)
+ else rightTile
+
+ val result = if (inverse)
+ gtInverseMask(leftTile, masking, maskValue.value, raster.NODATA)
+ else
+ gtMask(leftTile, masking, maskValue.value, raster.NODATA)
+
+ leftCtx match {
+ case Some(ctx) => ctx.toProjectRasterTile(result).toInternalRow
+ case None => result.toInternalRow
+ }
+ }
+}
+object Mask {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+
+ @ExpressionDescription(
+ usage = "_FUNC_(target, mask) - Generate a tile with the values from the data tile, but where cells in the masking tile contain NODATA, replace the data value with NODATA.",
+ arguments = """
+ Arguments:
+ * target - tile to mask
+ * mask - masking definition""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(target, mask);
+ ..."""
+ )
+ case class MaskByDefined(target: Expression, mask: Expression)
+ extends Mask(target, mask, Literal(0), false) {
+ override def nodeName: String = "mask"
+ }
+ object MaskByDefined {
+ def apply(targetTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
+ new Column(MaskByDefined(targetTile.expr, maskTile.expr)).as[Tile]
+ }
+
+ @ExpressionDescription(
+ usage = "_FUNC_(target, mask) - Generate a tile with the values from the data tile, but where cells in the masking tile DO NOT contain NODATA, replace the data value with NODATA",
+ arguments = """
+ Arguments:
+ * target - tile to mask
+ * mask - masking definition""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(target, mask);
+ ..."""
+ )
+ case class InverseMaskByDefined(leftTile: Expression, rightTile: Expression)
+ extends Mask(leftTile, rightTile, Literal(0), true) {
+ override def nodeName: String = "inverse_mask"
+ }
+ object InverseMaskByDefined {
+ def apply(srcTile: Column, maskingTile: Column): TypedColumn[Any, Tile] =
+ new Column(InverseMaskByDefined(srcTile.expr, maskingTile.expr)).as[Tile]
+ }
+
+ @ExpressionDescription(
+ usage = "_FUNC_(target, mask, maskValue) - Generate a tile with the values from the data tile, but where cells in the masking tile contain the masking value, replace the data value with NODATA.",
+ arguments = """
+ Arguments:
+ * target - tile to mask
+ * mask - masking definition""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(target, mask, maskValue);
+ ..."""
+ )
+ case class MaskByValue(leftTile: Expression, rightTile: Expression, maskValue: Expression)
+ extends Mask(leftTile, rightTile, maskValue, false) {
+ override def nodeName: String = "mask_by_value"
+ }
+ object MaskByValue {
+ def apply(srcTile: Column, maskingTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
+ new Column(MaskByValue(srcTile.expr, maskingTile.expr, maskValue.expr)).as[Tile]
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/RasterRefToTile.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/RasterRefToTile.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala
index a2a9a961b..c3aa3f337 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/RasterRefToTile.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,18 +19,19 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.transformers
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.expressions.row
import astraea.spark.rasterframes.ref.RasterRef
import com.typesafe.scalalogging.LazyLogging
import geotrellis.raster.Tile
-import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression}
import org.apache.spark.sql.rf._
import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
/**
* Realizes a RasterRef into a Tile.
@@ -47,6 +48,7 @@ case class RasterRefToTile(child: Expression) extends UnaryExpression
override def dataType: DataType = new TileUDT
override protected def nullSafeEval(input: Any): Any = {
+ implicit val ser = TileUDT.tileSerializer
val ref = row(input).to[RasterRef]
(ref.tile: Tile).toInternalRow
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/RasterSourceToRasterRefs.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/RasterSourceToRasterRefs.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala
index f754b8401..2581f8be5 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/RasterSourceToRasterRefs.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,19 +19,19 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.transformers
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
import astraea.spark.rasterframes.ref.RasterRef
import astraea.spark.rasterframes.util._
import com.typesafe.scalalogging.LazyLogging
-import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.rf._
import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.{Column, TypedColumn}
import scala.util.control.NonFatal
@@ -44,20 +44,20 @@ import scala.util.control.NonFatal
case class RasterSourceToRasterRefs(children: Seq[Expression], applyTiling: Boolean) extends Expression
with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging {
- private val rasterSourceType = new RasterSourceUDT()
+ private val RasterSourceType = new RasterSourceUDT()
private val rasterRefSchema = CatalystSerializer[RasterRef].schema
- override def inputTypes: Seq[DataType] = Seq.fill(children.size)(rasterSourceType)
+ override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType)
override def nodeName: String = "raster_source_to_raster_ref"
override def elementSchema: StructType = StructType(
- children.map(e ⇒ StructField(e.name, rasterRefSchema, true))
+ children.map(e ⇒ StructField(e.name, rasterRefSchema, false))
)
override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
try {
val refs = children.map { child ⇒
- val src = rasterSourceType.deserialize(child.eval(input))
+ val src = RasterSourceType.deserialize(child.eval(input))
if (applyTiling) src.nativeTiling.map(e ⇒ RasterRef(src, Some(e))) else Seq(RasterRef(src))
}
refs.transpose.map(ts ⇒ InternalRow(ts.map(_.toInternalRow): _*))
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/RasterSourceToTiles.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala
similarity index 95%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/RasterSourceToTiles.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala
index 224c70823..2b1caa3ba 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/RasterSourceToTiles.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,7 +19,7 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.transformers
import astraea.spark.rasterframes.encoders.CatalystSerializer._
import astraea.spark.rasterframes.util._
@@ -53,6 +53,8 @@ case class RasterSourceToTiles(children: Seq[Expression], applyTiling: Boolean)
)
override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
+ implicit val ser = TileUDT.tileSerializer
+
try {
val refs = children.map { child ⇒
val src = RasterSourceType.deserialize(child.eval(input))
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/ReprojectGeometry.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala
similarity index 98%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/ReprojectGeometry.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala
index e10cd323a..7e78c5942 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/ReprojectGeometry.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala
@@ -19,11 +19,11 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.transformers
import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, serialized_literal}
import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import astraea.spark.rasterframes.encoders.{CatalystSerializer, serialized_literal}
import astraea.spark.rasterframes.jts.ReprojectionTransformer
import com.vividsolutions.jts.geom.Geometry
import geotrellis.proj4.CRS
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/SetCellType.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala
similarity index 64%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/SetCellType.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala
index d0bc0d3af..96fcd4288 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/SetCellType.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,11 +19,13 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.transformers
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
import astraea.spark.rasterframes.encoders.StandardEncoders._
+import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor
+import astraea.spark.rasterframes.expressions.row
import geotrellis.raster.{CellType, Tile}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
@@ -41,50 +43,54 @@ import org.apache.spark.unsafe.types.UTF8String
*
* @since 9/11/18
*/
-case class SetCellType(tile: Expression, cellType: Expression) extends BinaryExpression with CodegenFallback {
+case class SetCellType(tile: Expression, cellType: Expression)
+ extends BinaryExpression with CodegenFallback {
def left = tile
def right = cellType
override def nodeName: String = "set_cell_type"
- override def dataType: DataType = new TileUDT()
+ override def dataType: DataType = left.dataType
private val ctSchema = CatalystSerializer[CellType].schema
override def checkInputDataTypes(): TypeCheckResult = {
- RequiresTile.check(tile) match {
- case TypeCheckSuccess ⇒
- right.dataType match {
- case StringType ⇒ TypeCheckSuccess
- case st: StructType if st == ctSchema ⇒ TypeCheckSuccess
- case _ ⇒ TypeCheckFailure(
- s"Expected CellType but received '${right.dataType.simpleString}'"
- )
- }
- case o ⇒ o
- }
+ if (!tileExtractor.isDefinedAt(left.dataType))
+ TypeCheckFailure(s"Input type '${left.dataType}' does not conform to a raster type.")
+ else
+ right.dataType match {
+ case StringType => TypeCheckSuccess
+ case t if t.conformsTo(ctSchema) => TypeCheckSuccess
+ case _ =>
+ TypeCheckFailure(s"Expected CellType but received '${right.dataType.simpleString}'")
+ }
}
private def toCellType(datum: Any): CellType = {
right.dataType match {
- case StringType ⇒
+ case StringType =>
val text = datum.asInstanceOf[UTF8String].toString
CellType.fromName(text)
- case st: StructType if st == ctSchema ⇒
+ case st if st.conformsTo(ctSchema) =>
row(datum).to[CellType]
}
}
- override protected def nullSafeEval(left: Any, right: Any): InternalRow = {
- val t = row(left).to[Tile]
- val ct = toCellType(right)
- t.convert(ct).toInternalRow
+ override protected def nullSafeEval(tileInput: Any, ctInput: Any): InternalRow = {
+ implicit val tileSer = TileUDT.tileSerializer
+
+ val (tile, ctx) = tileExtractor(left.dataType)(row(tileInput))
+ val ct = toCellType(ctInput)
+ val result = tile.convert(ct)
+
+ ctx match {
+ case Some(c) => c.toProjectRasterTile(result).toInternalRow
+ case None => result.toInternalRow
+ }
}
}
object SetCellType {
-
def apply(tile: Column, cellType: CellType): TypedColumn[Any, Tile] =
new Column(new SetCellType(tile.expr, lit(cellType.name).expr)).as[Tile]
def apply(tile: Column, cellType: String): TypedColumn[Any, Tile] =
new Column(new SetCellType(tile.expr, lit(cellType).expr)).as[Tile]
-
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala
new file mode 100644
index 000000000..02a4bc4e8
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala
@@ -0,0 +1,49 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.transformers
+import astraea.spark.rasterframes.expressions.UnaryRasterOp
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.types.{DataType, DataTypes, DoubleType}
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Coverts the contents of the given tile to an array of double floating-point values",
+ arguments = """
+ Arguments:
+ * tile - tile to convert"""
+)
+case class TileToArrayDouble(child: Expression) extends UnaryRasterOp with CodegenFallback {
+ override def nodeName: String = "tile_to_array_double"
+ override def dataType: DataType = DataTypes.createArrayType(DoubleType, false)
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = {
+ ArrayData.toArrayData(tile.toArrayDouble())
+ }
+}
+object TileToArrayDouble {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc
+ def apply(tile: Column): TypedColumn[Any, Array[Double]] =
+ new Column(TileToArrayDouble(tile.expr)).as[Array[Double]]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala
new file mode 100644
index 000000000..31ad81516
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala
@@ -0,0 +1,50 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.expressions.transformers
+
+import astraea.spark.rasterframes.expressions.UnaryRasterOp
+import astraea.spark.rasterframes.model.TileContext
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.types.{DataType, DataTypes, IntegerType}
+import org.apache.spark.sql.{Column, TypedColumn}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Coverts the contents of the given tile to an array of integer values",
+ arguments = """
+ Arguments:
+ * tile - tile to convert"""
+)
+case class TileToArrayInt(child: Expression) extends UnaryRasterOp with CodegenFallback {
+ override def nodeName: String = "tile_to_array_int"
+ override def dataType: DataType = DataTypes.createArrayType(IntegerType, false)
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = {
+ ArrayData.toArrayData(tile.toArray())
+ }
+}
+object TileToArrayInt {
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc
+ def apply(tile: Column): TypedColumn[Any, Array[Int]] =
+ new Column(TileToArrayInt(tile.expr)).as[Array[Int]]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/URIToRasterSource.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala
similarity index 93%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/URIToRasterSource.scala
rename to core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala
index a857324eb..0821e43db 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/URIToRasterSource.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,11 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package astraea.spark.rasterframes.expressions.transformers
import java.net.URI
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
import astraea.spark.rasterframes.ref.RasterSource.ReadCallback
import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
import com.typesafe.scalalogging.LazyLogging
@@ -54,7 +53,7 @@ case class URIToRasterSource(override val child: Expression, accumulator: Option
val uriString = input.asInstanceOf[UTF8String].toString
val uri = URI.create(uriString)
val ref = RasterSource(uri, accumulator)
- ref.toInternalRow
+ RasterSourceUDT.serialize(ref)
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala
index 8f8080c13..ca38322ac 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types.{MetadataBuilder, StructField}
import org.apache.spark.sql.{Column, DataFrame, TypedColumn}
import spray.json.JsonFormat
-
+import astraea.spark.rasterframes.encoders.StandardEncoders._
import scala.util.Try
/**
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala
index fd658b596..e83e55fd3 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala
@@ -33,7 +33,8 @@ import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{Metadata, TimestampType}
import spray.json._
-
+import astraea.spark.rasterframes.encoders.StandardEncoders._
+import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
import scala.reflect.runtime.universe._
/**
@@ -44,10 +45,6 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
with RFSpatialColumnMethods with MetadataKeys with LazyLogging {
import Implicits.{WithDataFrameMethods, WithRasterFrameMethods}
- private val _stableDF = self
- import _stableDF.sqlContext.implicits._
-
-
/**
* A convenience over `DataFrame.withColumnRenamed` whereby the `RasterFrame` type is maintained.
*/
@@ -190,6 +187,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
def updateBounds[T: SpatialComponent: Boundable: JsonFormat: TypeTag](tlm: TileLayerMetadata[T],
keys: Dataset[T]): DataFrame = {
+ implicit val enc = Encoders.product[KeyBounds[T]]
val keyBounds = keys
.map(k ⇒ KeyBounds(k, k))
.reduce(_ combine _)
@@ -238,6 +236,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
def toMultibandTileLayerRDD(tileCols: Column*): Either[MultibandTileLayerRDD[SpatialKey], MultibandTileLayerRDD[SpaceTimeKey]] =
tileLayerMetadata.fold(
tlm ⇒ {
+ implicit val genEnc = expressionEncoder[(SpatialKey, Array[Tile])]
val rdd = self
.select(self.spatialKeyColumn, array(tileCols: _*)).as[(SpatialKey, Array[Tile])]
.rdd
@@ -247,6 +246,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
Left(ContextRDD(rdd, tlm))
},
tlm ⇒ {
+ implicit val genEnc = expressionEncoder[(SpatialKey, TemporalKey, Array[Tile])]
val rdd = self
.select(self.spatialKeyColumn, self.temporalKeyColumn.get, array(tileCols: _*)).as[(SpatialKey, TemporalKey, Array[Tile])]
.rdd
@@ -259,48 +259,6 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
private[rasterframes] def extract[M: JsonFormat](metadataKey: String)(md: Metadata) =
md.getMetadata(metadataKey).json.parseJson.convertTo[M]
- // TODO: Take care of DRY below
-// private def rasterize[T <: CellGrid: TypeTag](
-// tileCols: Seq[Column],
-// rasterCols: Int,
-// rasterRows: Int,
-// resampler: ResampleMethod): ProjectedRaster[T] = {
-//
-// val clipped = clipLayerExtent
-//
-// val md = clipped.tileLayerMetadata.widen
-// val newLayout = LayoutDefinition(md.extent, TileLayout(1, 1, rasterCols, rasterRows))
-//
-// val trans = md.mapTransform
-//
-// //val cell_type = rdd.first()._2.cell_type
-// val keyBounds = Bounds(SpatialKey(0, 0), SpatialKey(0, 0))
-// val newLayerMetadata =
-// md.copy(layout = newLayout, bounds = keyBounds)
-//
-//
-// val newLayer = typeOf[T] match {
-// case tpe if tpe <:< typeOf[Tile] ⇒
-// val r = clipped.toTileLayerRDD(tileCols.head)
-// .fold(identity, _.map { case (stk, t) ⇒ (stk.spatialKey, t) }) // <-- Drops the temporal key outright
-// .map { case (key, tile) ⇒ (ProjectedExtent(trans(key), md.crs), tile) }
-// ContextRDD(r, md)
-// .tileToLayout(newLayerMetadata, Tiler.Options(resampler))
-// case tpe if tpe <:< typeOf[MultibandTile] ⇒
-// val r = clipped.toMultibandTileLayerRDD(tileCols: _*)
-// .fold(identity, _.map { case (stk, t) ⇒ (stk.spatialKey, t) }) // <-- Drops the temporal key outright
-// .map { case (key, tile) ⇒ (ProjectedExtent(trans(key), md.crs), tile) }
-// ContextRDD(r, md)
-// .tileToLayout(newLayerMetadata, Tiler.Options(resampler))
-// }
-//
-// val stitchedTile = newLayer.stitch()
-//
-// val croppedTile = stitchedTile.crop(rasterCols, rasterRows)
-//
-// ProjectedRaster(croppedTile, md.extent, md.crs)
-// }
-
/** Convert the tiles in the RasterFrame into a single raster. For RasterFrames keyed with temporal keys, they
* will be merge undeterministically. */
def toRaster(tileCol: Column,
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/CellCountAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/functions/CellCountAggregate.scala
deleted file mode 100644
index 856b367c6..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/CellCountAggregate.scala
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package astraea.spark.rasterframes.functions
-
-import org.apache.spark.sql.{Column, TypedColumn}
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.rf.{TileUDT, _}
-import org.apache.spark.sql.types.{LongType, Metadata}
-
-/**
- * Cell count (data or NoData) aggregate function.
- *
- * @since 10/5/17
- * @param isData true if count should be of non-NoData cells, false if count should be of NoData cells.
- */
-case class CellCountAggregate(isData: Boolean, child: Expression) extends DeclarativeAggregate {
-
- override def prettyName: String =
- if (isData) "agg_data_cells"
- else "agg_no_data_cells"
-
- private lazy val count =
- AttributeReference("count", LongType, false, Metadata.empty)()
-
- override lazy val aggBufferAttributes = count :: Nil
-
- val initialValues = Seq(
- Literal(0L)
- )
-
- private val cellTest =
- if (isData) udf(dataCells)
- else udf(noDataCells)
-
- val updateExpressions = Seq(
- If(IsNull(child), count, Add(count, cellTest(new Column(child)).expr))
- )
-
- val mergeExpressions = Seq(
- count.left + count.right
- )
-
- val evaluateExpression = count
-
- def inputTypes = Seq(TileUDT)
-
- def nullable = true
-
- def dataType = LongType
-
- def children = Seq(child)
-}
-
-object CellCountAggregate {
- import astraea.spark.rasterframes.encoders.SparkDefaultEncoders._
- def apply(isData: Boolean, tile: Column): TypedColumn[Any, Long] =
- new Column(new CellCountAggregate(isData, tile.expr).toAggregateExpression()).as[Long]
-}
-
-
-
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/CellStatsAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/functions/CellStatsAggregate.scala
deleted file mode 100644
index f45e7e0cb..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/CellStatsAggregate.scala
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package astraea.spark.rasterframes.functions
-
-import geotrellis.raster.{Tile, _}
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
-import org.apache.spark.sql.types.{DataType, _}
-
-/**
- * Statistics aggregation function for a full column of tiles.
- *
- * @since 4/17/17
- */
-case class CellStatsAggregate() extends UserDefinedAggregateFunction {
- import CellStatsAggregate.C
-
- private val TileType = new TileUDT()
-
- override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
-
- override def dataType: DataType =
- StructType(
- Seq(
- StructField("dataCells", LongType),
- StructField("noDataCells", LongType),
- StructField("min", DoubleType),
- StructField("max", DoubleType),
- StructField("mean", DoubleType),
- StructField("variance", DoubleType)
- )
- )
-
- override def bufferSchema: StructType =
- StructType(
- Seq(
- StructField("dataCells", LongType),
- StructField("noDataCells", LongType),
- StructField("min", DoubleType),
- StructField("max", DoubleType),
- StructField("sum", DoubleType),
- StructField("sumSqr", DoubleType)
- )
- )
-
- override def deterministic: Boolean = true
-
- override def initialize(buffer: MutableAggregationBuffer): Unit = {
- buffer(C.COUNT) = 0L
- buffer(C.NODATA) = 0L
- buffer(C.MIN) = Double.MaxValue
- buffer(C.MAX) = Double.MinValue
- buffer(C.SUM) = 0.0
- buffer(C.SUM_SQRS) = 0.0
- }
-
- override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
- if(!input.isNullAt(0)) {
- val tile = input.getAs[Tile](0)
- var count = buffer.getLong(C.COUNT)
- var nodata = buffer.getLong(C.NODATA)
- var min = buffer.getDouble(C.MIN)
- var max = buffer.getDouble(C.MAX)
- var sum = buffer.getDouble(C.SUM)
- var sumSqr = buffer.getDouble(C.SUM_SQRS)
-
- tile.foreachDouble(c ⇒ if (isData(c)) {
- count += 1
- min = math.min(min, c)
- max = math.max(max, c)
- sum = sum + c
- sumSqr = sumSqr + c * c
- }
- else nodata += 1
- )
-
- buffer(C.COUNT) = count
- buffer(C.NODATA) = nodata
- buffer(C.MIN) = min
- buffer(C.MAX) = max
- buffer(C.SUM) = sum
- buffer(C.SUM_SQRS) = sumSqr
- }
- }
-
- override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
- buffer1(C.COUNT) = buffer1.getLong(C.COUNT) + buffer2.getLong(C.COUNT)
- buffer1(C.NODATA) = buffer1.getLong(C.NODATA) + buffer2.getLong(C.NODATA)
- buffer1(C.MIN) = math.min(buffer1.getDouble(C.MIN), buffer2.getDouble(C.MIN))
- buffer1(C.MAX) = math.max(buffer1.getDouble(C.MAX), buffer2.getDouble(C.MAX))
- buffer1(C.SUM) = buffer1.getDouble(C.SUM) + buffer2.getDouble(C.SUM)
- buffer1(C.SUM_SQRS) = buffer1.getDouble(C.SUM_SQRS) + buffer2.getDouble(C.SUM_SQRS)
- }
-
- override def evaluate(buffer: Row): Any = {
- val count = buffer.getLong(C.COUNT)
- val sum = buffer.getDouble(C.SUM)
- val sumSqr = buffer.getDouble(C.SUM_SQRS)
- val mean = sum / count
- val variance = sumSqr / count - mean * mean
- Row(count, buffer(C.NODATA), buffer(C.MIN), buffer(C.MAX), mean, variance)
- }
-}
-
-object CellStatsAggregate {
- /** Column index values. */
- private object C {
- final val COUNT = 0
- final val NODATA = 1
- final val MIN = 2
- final val MAX = 3
- final val SUM = 4
- final val SUM_SQRS = 5
- }
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/HistogramAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/functions/HistogramAggregate.scala
deleted file mode 100644
index 972ae2b73..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/HistogramAggregate.scala
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package astraea.spark.rasterframes.functions
-
-import java.nio.ByteBuffer
-
-import astraea.spark.rasterframes.encoders.StandardEncoders
-import astraea.spark.rasterframes.stats.CellHistogram
-import geotrellis.raster.Tile
-import geotrellis.raster.histogram.{Histogram, StreamingHistogram}
-import geotrellis.spark.util.KryoSerializer
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
-import org.apache.spark.sql.types._
-
-/**
- * Histogram aggregation function for a full column of tiles.
- *
- * @since 4/24/17
- */
-case class HistogramAggregate(numBuckets: Int) extends UserDefinedAggregateFunction {
- def this() = this(StreamingHistogram.DEFAULT_NUM_BUCKETS)
-
- private val TileType = new TileUDT()
-
- override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
-
- override def bufferSchema: StructType = StructType(StructField("buffer", BinaryType) :: Nil)
-
- override def dataType: DataType = StandardEncoders.histEncoder.schema
-
- override def deterministic: Boolean = true
-
- @transient
- private lazy val ser = KryoSerializer.ser.newInstance()
-
- @inline
- private def marshall(hist: Histogram[Double]): Array[Byte] = ser.serialize(hist).array()
-
- @inline
- private def unmarshall(blob: Array[Byte]): Histogram[Double] = ser.deserialize(ByteBuffer.wrap(blob))
-
- override def initialize(buffer: MutableAggregationBuffer): Unit =
- buffer(0) = marshall(StreamingHistogram(numBuckets))
-
- private val safeMerge = safeEval((h1: Histogram[Double], h2: Histogram[Double]) ⇒ h1 merge h2)
-
- override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
- val tile = input.getAs[Tile](0)
- val hist1 = unmarshall(buffer.getAs[Array[Byte]](0))
- val hist2 = safeEval(StreamingHistogram.fromTile(_: Tile, numBuckets))(tile)
- val updatedHist = safeMerge(hist1, hist2)
- buffer(0) = marshall(updatedHist)
- }
-
- override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
- val hist1 = unmarshall(buffer1.getAs[Array[Byte]](0))
- val hist2 = unmarshall(buffer2.getAs[Array[Byte]](0))
- val updatedHist = safeMerge(hist1, hist2)
- buffer1(0) = marshall(updatedHist)
- }
-
- override def evaluate(buffer: Row): Any = {
- val hist = unmarshall(buffer.getAs[Array[Byte]](0))
- CellHistogram(hist)
- }
-}
-
-object HistogramAggregate {
- def apply() = new HistogramAggregate(StreamingHistogram.DEFAULT_NUM_BUCKETS)
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/LocalCountAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/functions/LocalCountAggregate.scala
deleted file mode 100644
index 4f06b69c1..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/LocalCountAggregate.scala
+++ /dev/null
@@ -1,56 +0,0 @@
-package astraea.spark.rasterframes.functions
-
-import geotrellis.raster.mapalgebra.local.{Add, Defined, Undefined}
-import geotrellis.raster.{IntConstantNoDataCellType, Tile}
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
-import org.apache.spark.sql.types.{DataType, StructField, StructType}
-
-/**
- * Catalyst aggregate function that counts `NoData` values in a cell-wise fashion.
- *
- * @param isData true if count should be of non-NoData values, false for NoData values.
- * @since 8/11/17
- */
-class LocalCountAggregate(isData: Boolean) extends UserDefinedAggregateFunction {
-
- private val incCount =
- if (isData) safeBinaryOp((t1: Tile, t2: Tile) ⇒ Add(t1, Defined(t2)))
- else safeBinaryOp((t1: Tile, t2: Tile) ⇒ Add(t1, Undefined(t2)))
-
- private val add = safeBinaryOp(Add.apply(_: Tile, _: Tile))
-
- private val TileType = new TileUDT()
-
- override def dataType: DataType = TileType
-
- override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
-
- override def bufferSchema: StructType = inputSchema
-
- override def deterministic: Boolean = true
-
- override def initialize(buffer: MutableAggregationBuffer): Unit =
- buffer(0) = null
-
- override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
- val right = input.getAs[Tile](0)
- if (right != null) {
- if (buffer(0) == null) {
- buffer(0) = (
- if (isData) Defined(right) else Undefined(right)
- ).convert(IntConstantNoDataCellType)
- } else {
- val left = buffer.getAs[Tile](0)
- buffer(0) = incCount(left, right)
- }
- }
- }
-
- override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
- buffer1(0) = add(buffer1.getAs[Tile](0), buffer2.getAs[Tile](0))
- }
-
- override def evaluate(buffer: Row): Tile = buffer.getAs[Tile](0)
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/LocalMeanAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/functions/LocalMeanAggregate.scala
deleted file mode 100644
index 60d51457c..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/LocalMeanAggregate.scala
+++ /dev/null
@@ -1,20 +0,0 @@
-package astraea.spark.rasterframes.functions
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.rf.TileUDT
-import org.apache.spark.sql.types.DataType
-
-/**
- * Aggregation function that only returns the average. Depends on
- * [[LocalStatsAggregate]] for computation and just
- * selects the mean result tile.
- *
- * @since 8/11/17
- */
-class LocalMeanAggregate extends LocalStatsAggregate {
- override def dataType: DataType = new TileUDT()
- override def evaluate(buffer: Row): Any = {
- val superRow = super.evaluate(buffer).asInstanceOf[Row]
- if (superRow != null) superRow.get(3) else null
- }
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/LocalTileOpAggregate.scala b/core/src/main/scala/astraea/spark/rasterframes/functions/LocalTileOpAggregate.scala
deleted file mode 100644
index 8ae5eadad..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/LocalTileOpAggregate.scala
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package astraea.spark.rasterframes.functions
-
-import geotrellis.raster.Tile
-import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
-import org.apache.spark.sql.types._
-
-/**
- * Aggregation function for applying a [[LocalTileBinaryOp]] pairwise across all tiles. Assumes Monoid algebra.
- *
- * @since 4/17/17
- */
-class LocalTileOpAggregate(op: LocalTileBinaryOp) extends UserDefinedAggregateFunction {
-
- private val safeOp = safeBinaryOp(op.apply(_: Tile, _: Tile))
-
- private val TileType = new TileUDT()
-
- override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
-
- override def bufferSchema: StructType = inputSchema
-
- override def dataType: DataType = TileType
-
- override def deterministic: Boolean = true
-
- override def initialize(buffer: MutableAggregationBuffer): Unit =
- buffer(0) = null
-
- override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
- if (buffer(0) == null) {
- buffer(0) = input(0)
- } else {
- val t1 = buffer.getAs[Tile](0)
- val t2 = input.getAs[Tile](0)
- buffer(0) = safeOp(t1, t2)
- }
- }
-
- override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = update(buffer1, buffer2)
-
- override def evaluate(buffer: Row): Tile = buffer.getAs[Tile](0)
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/package.scala b/core/src/main/scala/astraea/spark/rasterframes/functions/package.scala
index e33570d64..060b08fa3 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/package.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/functions/package.scala
@@ -15,19 +15,15 @@
*/
package astraea.spark.rasterframes
+import astraea.spark.rasterframes.expressions.aggstats._
import astraea.spark.rasterframes.jts.ReprojectionTransformer
-import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics}
import astraea.spark.rasterframes.util.CRSParser
import com.vividsolutions.jts.geom.Geometry
-import geotrellis.proj4.CRS
import geotrellis.raster.mapalgebra.local._
-import geotrellis.raster.render.ascii.AsciiArtEncoder
import geotrellis.raster.{Tile, _}
import geotrellis.vector.Extent
import org.apache.spark.sql.SQLContext
-import scala.reflect.runtime.universe._
-
/**
* Module utils.
*
@@ -55,80 +51,6 @@ package object functions {
private[rasterframes] def safeEval[P1, P2, R](f: (P1, P2) ⇒ R): (P1, P2) ⇒ R =
(p1, p2) ⇒ if (p1 == null || p2 == null) null.asInstanceOf[R] else f(p1, p2)
-
- /** Count tile cells that have a data value. */
- private[rasterframes] val dataCells: (Tile) ⇒ Long = safeEval((t: Tile) ⇒ {
- var count: Long = 0
- t.dualForeach(
- z ⇒ if(isData(z)) count = count + 1
- ) (
- z ⇒ if(isData(z)) count = count + 1
- )
- count
- })
-
- /** Count tile cells that have a no-data value. */
- private[rasterframes] val noDataCells: (Tile) ⇒ Long = safeEval((t: Tile) ⇒ {
- var count: Long = 0
- t.dualForeach(
- z ⇒ if(isNoData(z)) count = count + 1
- )(
- z ⇒ if(isNoData(z)) count = count + 1
- )
- count
- })
-
- private[rasterframes] val isNoDataTile: (Tile) ⇒ Boolean = (t: Tile) ⇒ {
- if(t == null) true
- else t.isNoDataTile
- }
-
- /** Flattens tile into an array. */
- private[rasterframes] def tileToArray[T: HasCellType: TypeTag]: (Tile) ⇒ Array[T] = {
- def convert(tile: Tile) = {
- typeOf[T] match {
- case t if t =:= typeOf[Int] ⇒ tile.toArray()
- case t if t =:= typeOf[Double] ⇒ tile.toArrayDouble()
- case t if t =:= typeOf[Byte] ⇒ tile.toArray().map(_.toByte) // TODO: Check NoData handling. probably need to use dualForeach
- case t if t =:= typeOf[Short] ⇒ tile.toArray().map(_.toShort)
- case t if t =:= typeOf[Float] ⇒ tile.toArrayDouble().map(_.toFloat)
- }
- }
-
- safeEval[Tile, Array[T]] { t ⇒
- val tile = t match {
- case c: ConstantTile ⇒ c.toArrayTile()
- case o ⇒ o
- }
- val asArray: Array[_] = tile match {
- case t: IntArrayTile ⇒
- if (typeOf[T] =:= typeOf[Int]) t.array
- else convert(t)
- case t: DoubleArrayTile ⇒
- if (typeOf[T] =:= typeOf[Double]) t.array
- else convert(t)
- case t: ByteArrayTile ⇒
- if (typeOf[T] =:= typeOf[Byte]) t.array
- else convert(t)
- case t: UByteArrayTile ⇒
- if (typeOf[T] =:= typeOf[Byte]) t.array
- else convert(t)
- case t: ShortArrayTile ⇒
- if (typeOf[T] =:= typeOf[Short]) t.array
- else convert(t)
- case t: UShortArrayTile ⇒
- if (typeOf[T] =:= typeOf[Short]) t.array
- else convert(t)
- case t: FloatArrayTile ⇒
- if (typeOf[T] =:= typeOf[Float]) t.array
- else convert(t)
- case _: Tile ⇒
- throw new IllegalArgumentException("Unsupported tile type: " + tile.getClass)
- }
- asArray.asInstanceOf[Array[T]]
- }
- }
-
/** Converts an array into a tile. */
private[rasterframes] def arrayToTile(cols: Int, rows: Int) = {
safeEval[AnyRef, Tile]{
@@ -144,143 +66,9 @@ package object functions {
}
}
- /** Computes the column aggregate histogram */
- private[rasterframes] val aggHistogram = HistogramAggregate()
-
- /** Computes the column aggregate statistics */
- private[rasterframes] val aggStats = CellStatsAggregate()
-
/** Set the tile's no-data value. */
private[rasterframes] def withNoData(nodata: Double) = safeEval[Tile, Tile](_.withNoData(Some(nodata)))
- /** Single tile histogram. */
- private[rasterframes] val tileHistogram = safeEval[Tile, CellHistogram](t ⇒ CellHistogram(t.histogramDouble))
-
- /** Single tile statistics. Convenience for `tile_histogram.statistics`. */
- private[rasterframes] val tileStats = safeEval[Tile, CellStatistics]((t: Tile) ⇒
- if (t.cellType.isFloatingPoint) t.statisticsDouble.map(CellStatistics.apply).orNull
- else t.statistics.map(CellStatistics.apply).orNull
- )
-
- /** Add up all the cell values. */
- private[rasterframes] val tileSum: (Tile) ⇒ Double = safeEval((t: Tile) ⇒ {
- var sum: Double = 0.0
- t.foreachDouble(z ⇒ if(isData(z)) sum = sum + z)
- sum
- })
-
- /** Find the minimum cell value. */
- private[rasterframes] val tileMin: (Tile) ⇒ Double = safeEval((t: Tile) ⇒ {
- var min: Double = Double.MaxValue
- t.foreachDouble(z ⇒ if(isData(z)) min = math.min(min, z))
- if (min == Double.MaxValue) Double.NaN
- else min
- })
-
- /** Find the maximum cell value. */
- private[rasterframes] val tileMax: (Tile) ⇒ Double = safeEval((t: Tile) ⇒ {
- var max: Double = Double.MinValue
- t.foreachDouble(z ⇒ if(isData(z)) max = math.max(max, z))
- if (max == Double.MinValue) Double.NaN
- else max
- })
-
- /** Single tile mean. Convenience for `tile_histogram.statistics.mean`. */
- private[rasterframes] val tileMean: (Tile) ⇒ Double = safeEval((t: Tile) ⇒ {
- var sum: Double = 0.0
- var count: Long = 0
- t.dualForeach(
- z ⇒ if(isData(z)) { count = count + 1; sum = sum + z }
- ) (
- z ⇒ if(isData(z)) { count = count + 1; sum = sum + z }
- )
- sum/count
- })
-
- /** Compute summary cell-wise statistics across tiles. */
- private[rasterframes] val localAggStats = new LocalStatsAggregate()
-
- /** Compute the cell-wise max across tiles. */
- private[rasterframes] val localAggMax = new LocalTileOpAggregate(Max)
-
- /** Compute the cell-wise min across tiles. */
- private[rasterframes] val localAggMin = new LocalTileOpAggregate(Min)
-
- /** Compute the cell-wise main across tiles. */
- private[rasterframes] val localAggMean = new LocalMeanAggregate()
-
- /** Compute the cell-wise count of non-NA across tiles. */
- private[rasterframes] val localAggCount = new LocalCountAggregate(true)
-
- /** Compute the cell-wise count of non-NA across tiles. */
- private[rasterframes] val localAggNodataCount = new LocalCountAggregate(false)
-
- /** Convert the tile to a floating point type as needed for scalar operations. */
- @inline
- private def floatingPointTile(t: Tile) = if (t.cellType.isFloatingPoint) t else t.convert(DoubleConstantNoDataCellType)
-
- /** Cell-wise addition between tiles. */
- private[rasterframes] val localAdd: (Tile, Tile) ⇒ Tile = safeEval(Add.apply)
-
- /** Cell-wise addition of a scalar to a tile. */
- private[rasterframes] val localAddScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar:Int) => {
- t.localAdd(scalar)
- })
-
- /** Cell-wise addition of a scalar to a tile. */
- private[rasterframes] val localAddScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar:Double) => {
- floatingPointTile(t).localAdd(scalar)
- })
-
- /** Cell-wise subtraction between tiles. */
- private[rasterframes] val localSubtract: (Tile, Tile) ⇒ Tile = safeEval(Subtract.apply)
-
- /** Cell-wise subtraction of a scalar from a tile. */
- private[rasterframes] val localSubtractScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar:Int) => {
- t.localSubtract(scalar)
- })
-
- /** Cell-wise subtraction of a scalar from a tile. */
- private[rasterframes] val localSubtractScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar:Double) => {
- floatingPointTile(t).localSubtract(scalar)
- })
-
- /** Cell-wise multiplication between tiles. */
- private[rasterframes] val localMultiply: (Tile, Tile) ⇒ Tile = safeEval(Multiply.apply)
-
- /** Cell-wise multiplication of a tile by a scalar. */
- private[rasterframes] val localMultiplyScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar:Int) => {
- t.localMultiply(scalar)
- })
-
- /** Cell-wise multiplication of a tile by a scalar. */
- private[rasterframes] val localMultiplyScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar:Double) => {
- floatingPointTile(t).localMultiply(scalar)
- })
-
- /** Cell-wise division between tiles. */
- private[rasterframes] val localDivide: (Tile, Tile) ⇒ Tile = safeEval(Divide.apply)
-
- /** Cell-wise division of a tile by a scalar. */
- private[rasterframes] val localDivideScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar:Int) => {
- t.localDivide(scalar)
- })
-
- /** Cell-wise division of a tile by a scalar. */
- private[rasterframes] val localDivideScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar:Double) => {
- floatingPointTile(t).localDivide(scalar)
- })
-
- /** Cell-wise normalized difference of tiles. */
- private[rasterframes] val normalizedDifference: (Tile, Tile) ⇒ Tile = safeEval((t1: Tile, t2:Tile) => {
- val diff = floatingPointTile(Subtract(t1, t2))
- val sum = floatingPointTile(Add(t1, t2))
- Divide(diff, sum)
- })
-
- /** Render tile as ASCII string. */
- private[rasterframes] val renderAscii: (Tile) ⇒ String = safeEval(_.renderAscii(AsciiArtEncoder.Palette.NARROW))
-
/** Constructor for constant tiles */
private[rasterframes] val makeConstantTile: (Number, Int, Int, String) ⇒ Tile = (value, cols, rows, cellTypeName) ⇒ {
val cellType = CellType.fromName(cellTypeName)
@@ -324,33 +112,11 @@ package object functions {
DoubleConstantNoDataCellType
).map(_.toString).distinct
- /**
- * Generate a tile with the values from the data tile, but where cells in the
- * masking tile contain NODATA, replace the data value with NODATA.
- */
- private[rasterframes] val mask: (Tile, Tile) ⇒ Tile =
- (dataTile, maskingTile) ⇒ Mask(dataTile, Defined(maskingTile), 0, NODATA)
-
- /**
- * Generate a tile with the values from the data tile, but where cells in the
- * masking tile contain the masking value, replace the data value with NODATA.
- */
- private[rasterframes] val maskByValue: (Tile, Tile, Int) ⇒ Tile =
- (dataTile, maskingTile, maskingValue) ⇒
- Mask(dataTile, maskingTile, maskingValue, NODATA)
-
- /**
- * Generate a tile with the values from the data tile, but where cells in the
- * masking tile DO NOT contain NODATA, replace the data value with NODATA.
- */
- private[rasterframes] val inverseMask: (Tile, Tile) ⇒ Tile =
- (dataTile, maskingTile) ⇒ InverseMask(dataTile, Defined(maskingTile), 0, NODATA)
-
/**
* Rasterize geometry into tiles.
*/
private[rasterframes] val rasterize: (Geometry, Geometry, Int, Int, Int) ⇒ Tile = {
- import geotrellis.vector.{Geometry ⇒ GTGeometry}
+ import geotrellis.vector.{Geometry => GTGeometry}
(geom, bounds, value, cols, rows) ⇒ {
// We have to do this because (as of spark 2.2.x) Encoder-only types
// can't be used as UDF inputs. Only Spark-native types and UDTs.
@@ -359,91 +125,6 @@ package object functions {
}
}
- /** Cellwise less than value comparison between two tiles. */
- private[rasterframes] val localLess: (Tile, Tile) ⇒ Tile = safeEval(Less.apply)
-
- /** Cellwise less than value comparison between a tile and a scalar. */
- private[rasterframes] val localLessScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar: Int) ⇒ {
- t.localLess(scalar)
- })
-
- /** Cellwise less than value comparison between a tile and a scalar. */
- private[rasterframes] val localLessScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar: Double) ⇒ {
- floatingPointTile(t).localLess(scalar)
- })
-
- /** Cellwise less than or equal to value comparison between two tiles. */
- private[rasterframes] val localLessEqual: (Tile, Tile) ⇒ Tile = safeEval(LessOrEqual.apply)
-
- /** Cellwise less than or equal to value comparison between a tile and a scalar. */
- private[rasterframes] val localLessEqualScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar: Int) ⇒ {
- t.localLessOrEqual(scalar)
- })
-
- /** Cellwise less than or equal to value comparison between a tile and a scalar. */
- private[rasterframes] val localLessEqualScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar: Double) ⇒ {
- floatingPointTile(t).localLessOrEqual(scalar)
- })
-
- /** Cellwise greater than value comparison between two tiles. */
- private[rasterframes] val localGreater: (Tile, Tile) ⇒ Tile = safeEval(Less.apply)
-
- /** Cellwise greater than value comparison between a tile and a scalar. */
- private[rasterframes] val localGreaterScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar: Int) ⇒ {
- t.localGreater(scalar)
- })
-
- /** Cellwise greater than value comparison between a tile and a scalar. */
- private[rasterframes] val localGreaterScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar: Double) ⇒ {
- floatingPointTile(t).localGreater(scalar)
- })
-
- /** Cellwise greater than or equal to value comparison between two tiles. */
- private[rasterframes] val localGreaterEqual: (Tile, Tile) ⇒ Tile = safeEval(LessOrEqual.apply)
-
- /** Cellwise greater than or equal to value comparison between a tile and a scalar. */
- private[rasterframes] val localGreaterEqualScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar: Int) ⇒ {
- t.localGreaterOrEqual(scalar)
- })
-
- /** Cellwise greater than or equal to value comparison between a tile and a scalar. */
- private[rasterframes] val localGreaterEqualScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar: Double) ⇒ {
- floatingPointTile(t).localGreaterOrEqual(scalar)
- })
-
- /** Cellwise equal to value comparison between two tiles. */
- private[rasterframes] val localEqual: (Tile, Tile) ⇒ Tile = safeEval(Equal.apply)
-
- /** Cellwise equal to value comparison between a tile and a scalar. */
- private[rasterframes] val localEqualScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar: Int) ⇒ {
- t.localEqual(scalar)
- })
-
- /** Cellwise equal to value comparison between a tile and a scalar. */
- private[rasterframes] val localEqualScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar: Double) ⇒ {
- floatingPointTile(t).localEqual(scalar)
- })
-
- /** Cellwise inequality value comparison between two tiles. */
- private[rasterframes] val localUnequal: (Tile, Tile) ⇒ Tile = safeEval(Unequal.apply)
-
- /** Cellwise inequality value comparison between a tile and a scalar. */
- private[rasterframes] val localUnequalScalarInt: (Tile, Int) ⇒ Tile = safeEval((t: Tile, scalar: Int) ⇒ {
- t.localUnequal(scalar)
- })
-
- /** Cellwise inequality value comparison between a tile and a scalar. */
- private[rasterframes] val localUnequalScalar: (Tile, Double) ⇒ Tile = safeEval((t: Tile, scalar: Double) ⇒ {
- floatingPointTile(t).localUnequal(scalar)
- })
-
- /** Reporjects a geometry column from one CRS to another. */
- private[rasterframes] val reprojectGeometry: (Geometry, CRS, CRS) ⇒ Geometry =
- (sourceGeom, src, dst) ⇒ {
- val trans = new ReprojectionTransformer(src, dst)
- trans.transform(sourceGeom)
- }
-
/** Reporjects a geometry column from one CRS to another, where CRS are defined in Proj4 format. */
private[rasterframes] val reprojectGeometryCRSName: (Geometry, String, String) ⇒ Geometry =
(sourceGeom, srcName, dstName) ⇒ {
@@ -454,64 +135,14 @@ package object functions {
}
def register(sqlContext: SQLContext): Unit = {
- sqlContext.udf.register("rf_mask", mask)
- sqlContext.udf.register("rf_mask_by_value", maskByValue)
- sqlContext.udf.register("rf_inverse_mask", inverseMask)
+
sqlContext.udf.register("rf_make_constant_tile", makeConstantTile)
sqlContext.udf.register("rf_tile_zeros", tileZeros)
sqlContext.udf.register("rf_tile_ones", tileOnes)
- sqlContext.udf.register("rf_tile_to_array_int", tileToArray[Int])
- sqlContext.udf.register("rf_tile_to_array_double", tileToArray[Double])
- sqlContext.udf.register("rf_agg_histogram", aggHistogram)
- sqlContext.udf.register("rf_agg_stats", aggStats)
- sqlContext.udf.register("rf_tile_min", tileMin)
- sqlContext.udf.register("rf_tile_max", tileMax)
- sqlContext.udf.register("rf_tile_mean", tileMean)
- sqlContext.udf.register("rf_tile_sum", tileSum)
- sqlContext.udf.register("rf_tile_histogram", tileHistogram)
- sqlContext.udf.register("rf_tile_stats", tileStats)
- sqlContext.udf.register("rf_data_cells", dataCells)
- sqlContext.udf.register("rf_no_data_cells", noDataCells)
- sqlContext.udf.register("rf_is_no_data_tile", isNoDataTile)
- sqlContext.udf.register("rf_local_agg_stats", localAggStats)
- sqlContext.udf.register("rf_local_agg_max", localAggMax)
- sqlContext.udf.register("rf_local_agg_min", localAggMin)
- sqlContext.udf.register("rf_local_agg_mean", localAggMean)
- sqlContext.udf.register("rf_local_agg_count", localAggCount)
- sqlContext.udf.register("rf_local_add", localAdd)
- sqlContext.udf.register("rf_local_add_scalar", localAddScalar)
- sqlContext.udf.register("rf_local_add_scalar_int", localAddScalarInt)
- sqlContext.udf.register("rf_local_subtract", localSubtract)
- sqlContext.udf.register("rf_local_subtract_scalar", localSubtractScalar)
- sqlContext.udf.register("rf_local_subtract_scalar_int", localSubtractScalarInt)
- sqlContext.udf.register("rf_local_multiply", localMultiply)
- sqlContext.udf.register("rf_local_multiply_scalar", localMultiplyScalar)
- sqlContext.udf.register("rf_local_multiply_scalar_int", localMultiplyScalarInt)
- sqlContext.udf.register("rf_local_divide", localDivide)
- sqlContext.udf.register("rf_local_divide_scalar", localDivideScalar)
- sqlContext.udf.register("rf_local_divide_scalar_int", localDivideScalarInt)
- sqlContext.udf.register("rf_normalized_difference", normalizedDifference)
+
sqlContext.udf.register("rf_cell_types", cellTypes)
- sqlContext.udf.register("rf_render_ascii", renderAscii)
sqlContext.udf.register("rf_rasterize", rasterize)
- sqlContext.udf.register("rf_less", localLess)
- sqlContext.udf.register("rf_less_scalar", localLessScalar)
- sqlContext.udf.register("rf_less_scalar_int", localLessScalarInt)
- sqlContext.udf.register("rf_less_equal", localLessEqual)
- sqlContext.udf.register("rf_less_equal_scalar", localLessEqualScalar)
- sqlContext.udf.register("rf_less_equal_scalar_int", localLessEqualScalarInt)
- sqlContext.udf.register("rf_greater", localGreater)
- sqlContext.udf.register("rf_greater_scalar", localGreaterScalar)
- sqlContext.udf.register("rf_greaterscalar_int", localGreaterScalarInt)
- sqlContext.udf.register("rf_greater_equal", localGreaterEqual)
- sqlContext.udf.register("rf_greater_equal_scalar", localGreaterEqualScalar)
- sqlContext.udf.register("rf_greater_equal_scalar_int", localGreaterEqualScalarInt)
- sqlContext.udf.register("rf_equal", localEqual)
- sqlContext.udf.register("rf_equal_scalar", localEqualScalar)
- sqlContext.udf.register("rf_equal_scalar_int", localEqualScalarInt)
- sqlContext.udf.register("rf_unequal", localUnequal)
- sqlContext.udf.register("rf_unequal_scalar", localUnequalScalar)
- sqlContext.udf.register("rf_unequal_scalar_int", localUnequalScalarInt)
+
sqlContext.udf.register("rf_reproject_geometry", reprojectGeometryCRSName)
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala b/core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala
index 03d74eed7..e257ebfa5 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala
@@ -29,14 +29,13 @@ import geotrellis.vector.{Point ⇒ gtPoint}
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.sql.functions._
import org.locationtech.geomesa.spark.jts.DataFrameFunctions.SpatialConstructors
+import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
/**
* Extension methods on typed columns allowing for DSL-like queries over JTS types.
* @since 1/10/18
*/
trait Implicits extends SpatialConstructors {
- import astraea.spark.rasterframes.encoders.SparkDefaultEncoders._
-
implicit class ExtentColumnMethods[T <: Geometry](val self: TypedColumn[Any, T])
extends MethodExtensions[TypedColumn[Any, T]] {
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala b/core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala
new file mode 100644
index 000000000..cac2903dd
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala
@@ -0,0 +1,50 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.model
+import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.types.{ShortType, StructField, StructType}
+
+case class CellContext(tile_context: TileContext, tile_data_context: TileDataContext, col_index: Short, row_index: Short)
+object CellContext {
+ implicit val serializer: CatalystSerializer[CellContext] = new CatalystSerializer[CellContext] {
+ override def schema: StructType = StructType(Seq(
+ StructField("tile_context", CatalystSerializer[TileContext].schema, false),
+ StructField("tile_data_context", CatalystSerializer[TileDataContext].schema, false),
+ StructField("col_index", ShortType, false),
+ StructField("row_index", ShortType, false)
+ ))
+ override protected def to[R](t: CellContext, io: CatalystSerializer.CatalystIO[R]): R = io.create(
+ io.to(t.tile_context),
+ io.to(t.tile_data_context),
+ t.col_index,
+ t.row_index
+ )
+ override protected def from[R](t: R, io: CatalystSerializer.CatalystIO[R]): CellContext = CellContext(
+ io.get[TileContext](t, 0),
+ io.get[TileDataContext](t, 1),
+ io.getShort(t, 2),
+ io.getShort(t, 3)
+ )
+ }
+ implicit def encoder: ExpressionEncoder[CellContext] = CatalystSerializerEncoder[CellContext]()
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala b/core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala
new file mode 100644
index 000000000..acf847e45
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala
@@ -0,0 +1,72 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.model
+import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import astraea.spark.rasterframes.ref.RasterRef
+import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
+import geotrellis.raster.{ArrayTile, Tile}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.types.{BinaryType, StructField, StructType}
+
+/** Represents the union of binary cell datas or a reference to the data.*/
+case class Cells(data: Either[Array[Byte], RasterRef]) {
+ def isRef: Boolean = data.isRight
+ /** Convert cells into either a RasterRefTile or an ArrayTile. */
+ def toTile(ctx: TileDataContext): Tile = {
+ data.fold(
+ bytes => ArrayTile.fromBytes(bytes, ctx.cell_type, ctx.dimensions.cols, ctx.dimensions.rows),
+ ref => RasterRefTile(ref)
+ )
+ }
+}
+
+object Cells {
+ /** Extracts the Cells from a Tile. */
+ def apply(t: Tile): Cells = {
+ t match {
+ case ref: RasterRefTile =>
+ Cells(Right(ref.rr))
+ case o =>
+ Cells(Left(o.toBytes))
+ }
+ }
+
+ implicit def cellsSerializer: CatalystSerializer[Cells] = new CatalystSerializer[Cells] {
+ override def schema: StructType = StructType(Seq(
+ StructField("cells", BinaryType, true),
+ StructField("ref", CatalystSerializer[RasterRef].schema, true)
+ ))
+ override protected def to[R](t: Cells, io: CatalystSerializer.CatalystIO[R]): R = io.create(
+ t.data.left.getOrElse(null),
+ t.data.right.map(rr => io.to(rr)).right.getOrElse(null)
+ )
+ override protected def from[R](t: R, io: CatalystSerializer.CatalystIO[R]): Cells = {
+ if (!io.isNullAt(t, 0))
+ Cells(Left(io.getByteArray(t, 0)))
+ else if (!io.isNullAt(t, 1))
+ Cells(Right(io.get[RasterRef](t, 1)))
+ else throw new IllegalArgumentException("must be eithe cell data or a ref, but not null")
+ }
+ }
+
+ implicit def encoder: ExpressionEncoder[Cells] = CatalystSerializerEncoder[Cells]()
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala b/core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala
new file mode 100644
index 000000000..f5d49524c
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala
@@ -0,0 +1,55 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.model
+import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import astraea.spark.rasterframes.tiles.ProjectedRasterTile
+import geotrellis.proj4.CRS
+import geotrellis.raster.Tile
+import geotrellis.vector.Extent
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.types.{StructField, StructType}
+
+case class TileContext(extent: Extent, crs: CRS) {
+ def toProjectRasterTile(t: Tile): ProjectedRasterTile = ProjectedRasterTile(t, extent, crs)
+}
+object TileContext {
+ def apply(prt: ProjectedRasterTile): TileContext = new TileContext(prt.extent, prt.crs)
+ def unapply(tile: Tile): Option[(Extent, CRS)] = tile match {
+ case prt: ProjectedRasterTile => Some((prt.extent, prt.crs))
+ case _ => None
+ }
+ implicit val serializer: CatalystSerializer[TileContext] = new CatalystSerializer[TileContext] {
+ override def schema: StructType = StructType(Seq(
+ StructField("extent", CatalystSerializer[Extent].schema, false),
+ StructField("crs", CatalystSerializer[CRS].schema, false)
+ ))
+ override protected def to[R](t: TileContext, io: CatalystSerializer.CatalystIO[R]): R = io.create(
+ io.to(t.extent),
+ io.to(t.crs)
+ )
+ override protected def from[R](t: R, io: CatalystSerializer.CatalystIO[R]): TileContext = TileContext(
+ io.get[Extent](t, 0),
+ io.get[CRS](t, 1)
+ )
+ }
+ implicit def encoder: ExpressionEncoder[TileContext] = CatalystSerializerEncoder[TileContext]()
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala b/core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala
new file mode 100644
index 000000000..121f8b845
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala
@@ -0,0 +1,59 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.model
+import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import geotrellis.raster.{CellType, Tile}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.types.{StructField, StructType}
+
+/** Encapsulates all information about a tile aside from actual cell values. */
+case class TileDataContext(cell_type: CellType, dimensions: TileDimensions)
+object TileDataContext {
+
+ /** Extracts the TileDataContext from a Tile. */
+ def apply(t: Tile): TileDataContext = {
+ require(t.cols <= Short.MaxValue, s"RasterFrames doesn't support tiles of size ${t.cols}")
+ require(t.rows <= Short.MaxValue, s"RasterFrames doesn't support tiles of size ${t.rows}")
+ TileDataContext(
+ t.cellType, TileDimensions(t.dimensions)
+ )
+ }
+
+ implicit val serializer: CatalystSerializer[TileDataContext] = new CatalystSerializer[TileDataContext] {
+ override def schema: StructType = StructType(Seq(
+ StructField("cell_type", CatalystSerializer[CellType].schema, false),
+ StructField("dimensions", CatalystSerializer[TileDimensions].schema, false)
+ ))
+
+ override protected def to[R](t: TileDataContext, io: CatalystIO[R]): R = io.create(
+ io.to(t.cell_type),
+ io.to(t.dimensions)
+ )
+ override protected def from[R](t: R, io: CatalystIO[R]): TileDataContext = TileDataContext(
+ io.get[CellType](t, 0),
+ io.get[TileDimensions](t, 1)
+ )
+ }
+
+ implicit def encoder: ExpressionEncoder[TileDataContext] = CatalystSerializerEncoder[TileDataContext]()
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/TileDimensions.scala b/core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala
similarity index 78%
rename from core/src/main/scala/astraea/spark/rasterframes/TileDimensions.scala
rename to core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala
index d850cabb4..2f7f579ba 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/TileDimensions.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,11 +19,12 @@
*
*/
-package astraea.spark.rasterframes
+package astraea.spark.rasterframes.model
-import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
+import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
import geotrellis.raster.Grid
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.types.{ShortType, StructField, StructType}
/**
@@ -34,6 +35,8 @@ import org.apache.spark.sql.types.{ShortType, StructField, StructType}
case class TileDimensions(cols: Int, rows: Int) extends Grid
object TileDimensions {
+ def apply(colsRows: (Int, Int)): TileDimensions = new TileDimensions(colsRows._1, colsRows._2)
+
implicit val serializer: CatalystSerializer[TileDimensions] = new CatalystSerializer[TileDimensions] {
override def schema: StructType = StructType(Seq(
StructField("cols", ShortType, false),
@@ -50,4 +53,6 @@ object TileDimensions {
io.getShort(t, 1)
)
}
+
+ implicit def encoder: ExpressionEncoder[TileDimensions] = ExpressionEncoder[TileDimensions]()
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/package.scala b/core/src/main/scala/astraea/spark/rasterframes/package.scala
index 54611eede..7b360ed25 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/package.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/package.scala
@@ -18,12 +18,11 @@ package astraea.spark
import astraea.spark.rasterframes.encoders.StandardEncoders
import astraea.spark.rasterframes.util.ZeroSevenCompatibilityKit
-import com.typesafe.config.{Config, ConfigFactory}
+import com.typesafe.config.ConfigFactory
import com.typesafe.scalalogging.LazyLogging
import geotrellis.raster.{Tile, TileFeature}
import geotrellis.spark.{ContextRDD, Metadata, SpaceTimeKey, SpatialKey, TileLayerMetadata}
import org.apache.spark.rdd.RDD
-import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql._
import org.locationtech.geomesa.spark.jts.DataFrameFunctions
import shapeless.tag.@@
@@ -61,12 +60,12 @@ package object rasterframes extends StandardColumns
val config = sqlContext.sparkSession.conf
if(config.getOption("spark.serializer").isEmpty) {
logger.warn("No serializer has been registered with Spark. Default Java serialization will be used, which is slow. " +
- "Consider the following settings:" +
+ "Consider using the following settings:" +
"""
| SparkSession
| .builder()
- | .master("local[*]")
- | .appName(getClass.getName)
+ | .master("...")
+ | .appName("...")
| .withKryoSerialization // <--- RasterFrames extension method
""".stripMargin
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala b/core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala
index 52a8e0b2f..ff176765c 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala
@@ -21,12 +21,17 @@
package astraea.spark.rasterframes.ref
+import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
import astraea.spark.rasterframes.tiles.ProjectedRasterTile
import com.typesafe.scalalogging.LazyLogging
import geotrellis.proj4.CRS
import geotrellis.raster.{CellType, GridBounds, Tile, TileLayout}
import geotrellis.spark.tiling.LayoutDefinition
import geotrellis.vector.{Extent, ProjectedExtent}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.rf.RasterSourceUDT
+import org.apache.spark.sql.types.{StructField, StructType}
/**
* A delayed-read projected raster implementation.
@@ -64,6 +69,8 @@ case class RasterRef(source: RasterSource, subextent: Option[Extent])
object RasterRef extends LazyLogging {
private val log = logger
+
+
/** Constructor for when data extent cover whole raster. */
def apply(source: RasterSource): RasterRef = RasterRef(source, None)
@@ -86,4 +93,25 @@ object RasterRef extends LazyLogging {
override def convert(ct: CellType): ProjectedRasterTile =
ProjectedRasterTile(rr.realizedTile.convert(ct), extent, crs)
}
+
+ implicit val rasterRefSerializer: CatalystSerializer[RasterRef] = new CatalystSerializer[RasterRef] {
+ val rsType = new RasterSourceUDT()
+ override def schema: StructType = StructType(Seq(
+ StructField("source", rsType, false),
+ StructField("subextent", CatalystSerializer[Extent].schema, true)
+ ))
+
+ override def to[R](t: RasterRef, io: CatalystIO[R]): R = io.create(
+ io.to(t.source)(RasterSourceUDT.rasterSourceSerializer),
+ t.subextent.map(io.to[Extent]).orNull
+ )
+
+ override def from[R](row: R, io: CatalystIO[R]): RasterRef = RasterRef(
+ io.get[RasterSource](row, 0)(RasterSourceUDT.rasterSourceSerializer),
+ if (io.isNullAt(row, 1)) None
+ else Option(io.get[Extent](row, 1))
+ )
+ }
+
+ implicit def rrEncoder: ExpressionEncoder[RasterRef] = CatalystSerializerEncoder[RasterRef](true)
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala b/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala
index 060c9f2e0..9dc9bd55e 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala
@@ -24,15 +24,16 @@ import java.net.URI
import java.time.ZonedDateTime
import java.time.format.DateTimeFormatter
+import astraea.spark.rasterframes.NOMINAL_TILE_SIZE
+import astraea.spark.rasterframes.model.TileContext
import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
import astraea.spark.rasterframes.tiles.ProjectedRasterTile
import astraea.spark.rasterframes.util.GeoTiffInfoSupport
-import astraea.spark.rasterframes.NOMINAL_TILE_SIZE
import com.typesafe.scalalogging.LazyLogging
import geotrellis.proj4.CRS
+import geotrellis.raster._
import geotrellis.raster.io.geotiff.reader.GeoTiffReader
import geotrellis.raster.io.geotiff.{GeoTiffSegmentLayout, MultibandGeoTiff, SinglebandGeoTiff, Tags}
-import geotrellis.raster._
import geotrellis.raster.split.Split
import geotrellis.spark.io.hadoop.HdfsRangeReader
import geotrellis.spark.io.s3.S3Client
@@ -42,6 +43,9 @@ import geotrellis.util.{FileRangeReader, RangeReader}
import geotrellis.vector.Extent
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.rf.RasterSourceUDT
import scala.util.Try
@@ -50,6 +54,7 @@ import scala.util.Try
*
* @since 8/21/18
*/
+@Experimental
sealed trait RasterSource extends ProjectedRasterLike with Serializable {
def crs: CRS
@@ -61,9 +66,21 @@ sealed trait RasterSource extends ProjectedRasterLike with Serializable {
def bandCount: Int
+ def tags: Option[Tags]
+
def read(extent: Extent): Either[Raster[Tile], Raster[MultibandTile]]
+ /** Reads the given extent as a single multiband raster. */
+ def readMultiband(extent: Extent): Raster[MultibandTile] =
+ read(extent).fold(r => {
+ r.copy(tile = MultibandTile(r.tile))
+ }, identity)
+
def readAll(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]]
+ def readAllMultiband(): Seq[Raster[MultibandTile]] =
+ readAll().fold(_.map(r => {
+ r.copy(tile = MultibandTile(r.tile))
+ }), identity)
def readAllLazy(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]] = {
val extents = nativeTiling
@@ -94,6 +111,8 @@ sealed trait RasterSource extends ProjectedRasterLike with Serializable {
def gridExtent = GridExtent(extent, cellSize)
+ def tileContext: TileContext = TileContext(extent, crs)
+
def nativeTiling: Seq[Extent] = {
nativeLayout.map { tileLayout ⇒
val layout = LayoutDefinition(extent, tileLayout)
@@ -108,7 +127,10 @@ sealed trait RasterSource extends ProjectedRasterLike with Serializable {
}
object RasterSource extends LazyLogging {
-
+ implicit def rsEncoder: ExpressionEncoder[RasterSource] = {
+ RasterSourceUDT // Makes sure UDT is registered first
+ ExpressionEncoder()
+ }
private def _logger = logger
@@ -168,6 +190,8 @@ object RasterSource extends LazyLogging {
override def bandCount: Int = 1
+ override def tags: Option[Tags] = None
+
override def read(extent: Extent): Either[Raster[Tile], Raster[MultibandTile]] = Left(
Raster(tile.crop(rasterExtent.gridBoundsFor(extent, false)), extent)
)
@@ -207,6 +231,8 @@ object RasterSource extends LazyLogging {
def bandCount: Int = tiffInfo.bandCount
+ override def tags: Option[Tags] = Option(tiffInfo.tags)
+
def nativeLayout: Option[TileLayout] = {
if (tiffInfo.segmentLayout.isTiled)
Some(tiffInfo.segmentLayout.tileLayout)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala b/core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala
index 82644d974..efc4908db 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala
@@ -19,7 +19,11 @@
*/
package astraea.spark.rasterframes.stats
-import geotrellis.raster.histogram.{StreamingHistogram, Histogram ⇒ GTHistogram}
+import astraea.spark.rasterframes.encoders.StandardEncoders
+import geotrellis.raster.Tile
+import geotrellis.raster.histogram.{Histogram => GTHistogram}
+import org.apache.spark.sql.types._
+
import scala.collection.mutable.{ListBuffer => MutableListBuffer}
/**
@@ -27,12 +31,9 @@ import scala.collection.mutable.{ListBuffer => MutableListBuffer}
*
* @since 4/3/18
*/
-case class CellHistogram(stats: CellStatistics, bins: Seq[CellHistogram.Bin]) {
-
- def labels = bins.map(_.value)
- def mean = stats.mean
- def totalCount = stats.dataCells
- def asciiStats = stats.asciiStats
+case class CellHistogram(bins: Seq[CellHistogram.Bin]) {
+ lazy val labels: Seq[Double] = bins.map(_.value)
+ lazy val totalCount = bins.foldLeft(0L)(_ + _.count)
def asciiHistogram(width: Int = 80)= {
val counts = bins.map(_.count)
val maxCount = counts.max.toFloat
@@ -68,7 +69,7 @@ case class CellHistogram(stats: CellStatistics, bins: Seq[CellHistogram.Bin]) {
}
}
- private def cdfIntervals(): Iterator[((Double, Double), (Double, Double))] = {
+ private def cdfIntervals: Iterator[((Double, Double), (Double, Double))] = {
if(bins.size < 2) {
Iterator.empty
} else {
@@ -151,15 +152,25 @@ case class CellHistogram(stats: CellStatistics, bins: Seq[CellHistogram.Bin]) {
object CellHistogram {
case class Bin(value: Double, count: Long)
+
+ def apply(tile: Tile): CellHistogram = {
+ val bins = if (tile.cellType.isFloatingPoint) {
+ val h = tile.histogramDouble
+ h.binCounts().map(p ⇒ Bin(p._1, p._2))
+ }
+ else {
+ val h = tile.histogram
+ h.binCounts().map(p ⇒ Bin(p._1, p._2))
+ }
+ CellHistogram(bins)
+ }
+
def apply(hist: GTHistogram[Int]): CellHistogram = {
- val stats = CellStatistics(hist.statistics().get)
- CellHistogram(stats, hist.binCounts().map(p ⇒ Bin(p._1.toDouble, p._2)))
+ CellHistogram(hist.binCounts().map(p ⇒ Bin(p._1, p._2)))
}
def apply(hist: GTHistogram[Double])(implicit ev: DummyImplicit): CellHistogram = {
- val stats = hist.statistics().map(CellStatistics.apply).getOrElse(CellStatistics.empty)
- // Code should be this, but can't due to geotrellis#2664:
- // val bins = hist.binCounts().map(p ⇒ Bin(p._1, p._2))
- val bins = hist.asInstanceOf[StreamingHistogram].buckets().map(b ⇒ Bin(b.label, b.count))
- CellHistogram(stats, bins)
+ CellHistogram(hist.binCounts().map(p ⇒ Bin(p._1, p._2)))
}
+
+ lazy val schema: StructType = StandardEncoders.cellHistEncoder.schema
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala b/core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala
index ae9729a22..e1ba03b60 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala
@@ -19,17 +19,20 @@
*/
package astraea.spark.rasterframes.stats
+import astraea.spark.rasterframes.encoders.StandardEncoders
+import geotrellis.raster.Tile
+import org.apache.spark.sql.types.StructType
/**
* Container for computed statistics over cells.
*
* @since 4/3/18
*/
-case class CellStatistics(dataCells: Long, noDataCells: Long, min: Double, max: Double, mean: Double, variance: Double) {
+case class CellStatistics(data_cells: Long, no_data_cells: Long, min: Double, max: Double, mean: Double, variance: Double) {
def stddev: Double = math.sqrt(variance)
def asciiStats = Seq(
- "data_cells: " + dataCells,
- "no_data_cells: " + noDataCells,
+ "data_cells: " + data_cells,
+ "no_data_cells: " + no_data_cells,
"min: " + min,
"max: " + max,
"mean: " + mean,
@@ -47,11 +50,21 @@ case class CellStatistics(dataCells: Long, noDataCells: Long, min: Double, max:
}
object CellStatistics {
// Convert GeoTrellis stats object into our simplified one.
+ private[stats]
def apply(stats: geotrellis.raster.summary.Statistics[Double]) =
new CellStatistics(stats.dataCells, -1, stats.zmin, stats.zmax, stats.mean, stats.stddev * stats.stddev)
+ private[stats]
def apply(stats: geotrellis.raster.summary.Statistics[Int])(implicit d: DummyImplicit) =
new CellStatistics(stats.dataCells, -1, stats.zmin.toDouble, stats.zmax.toDouble, stats.mean, stats.stddev * stats.stddev)
+ def apply(tile: Tile): Option[CellStatistics] = {
+ val base = if (tile.cellType.isFloatingPoint) tile.statisticsDouble.map(CellStatistics.apply)
+ else tile.statistics.map(CellStatistics.apply)
+ base.map(s => s.copy(no_data_cells = tile.size - s.data_cells))
+ }
+
def empty = new CellStatistics(0, 0, Double.NaN, Double.NaN, Double.NaN, Double.NaN)
+
+ lazy val schema: StructType = StandardEncoders.cellStatsEncoder.schema
}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala b/core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala
new file mode 100644
index 000000000..685722f62
--- /dev/null
+++ b/core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala
@@ -0,0 +1,25 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes.stats
+import geotrellis.raster.Tile
+
+case class LocalCellStatistics(count: Tile, min: Tile, max: Tile, mean: Tile, variance: Tile)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala b/core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala
index e871da0c7..021f0946a 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala
@@ -23,6 +23,8 @@ package astraea.spark.rasterframes.tiles
import java.nio.ByteBuffer
+import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
+import astraea.spark.rasterframes.model.{Cells, TileDataContext}
import geotrellis.raster._
import org.apache.spark.sql.catalyst.InternalRow
@@ -37,7 +39,6 @@ import org.apache.spark.sql.catalyst.InternalRow
* @since 11/29/17
*/
class InternalRowTile(val mem: InternalRow) extends DelegatingTile {
- import org.apache.spark.sql.rf.TileUDT.C
import InternalRowTile._
/** @group COPIES */
@@ -58,18 +59,30 @@ class InternalRowTile(val mem: InternalRow) extends DelegatingTile {
/** @group COPIES */
protected override def delegate: Tile = realizedTile
+ private lazy val cellContext: TileDataContext =
+ CatalystIO[InternalRow].get[TileDataContext](mem, 0)
+
+
/** Retrieve the cell type from the internal encoding. */
- override lazy val cellType: CellType =
- CellType.fromName(mem.getString(C.CELL_TYPE))
+ override def cellType: CellType = cellContext.cell_type
/** Retrieve the number of columns from the internal encoding. */
- override val cols: Int = mem.getShort(C.COLS)
+ override def cols: Int = cellContext.dimensions.cols
/** Retrieve the number of rows from the internal encoding. */
- override val rows: Int = mem.getShort(C.ROWS)
+ override def rows: Int = cellContext.dimensions.rows
/** Get the internally encoded tile data cells. */
- override lazy val toBytes: Array[Byte] = mem.getBinary(C.CELLS)
+ override lazy val toBytes: Array[Byte] = {
+ val cellData = CatalystIO[InternalRow]
+ .get[Cells](mem, 1)
+ .data
+
+ cellData.left
+ .getOrElse(throw new IllegalStateException(
+ "Expected tile cell bytes, but received RasterRef instead: " + cellData.right.get)
+ )
+ }
private lazy val toByteBuffer: ByteBuffer = {
val data = toBytes
diff --git a/core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala b/core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala
index e8a18b432..a9551dd13 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala
@@ -21,10 +21,19 @@
package astraea.spark.rasterframes.tiles
+import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
+import astraea.spark.rasterframes.model.TileContext
import astraea.spark.rasterframes.ref.ProjectedRasterLike
+import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
import geotrellis.proj4.CRS
-import geotrellis.raster.{ProjectedRaster, Tile}
+import geotrellis.raster.io.geotiff.SinglebandGeoTiff
+import geotrellis.raster.{CellType, ProjectedRaster, Tile}
import geotrellis.vector.{Extent, ProjectedExtent}
+import org.apache.spark.sql.Encoder
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.{StructField, StructType}
/**
* A Tile that's also like a ProjectedRaster, with delayed evaluation support.
@@ -39,10 +48,46 @@ trait ProjectedRasterTile extends DelegatingTile with ProjectedRasterLike {
}
object ProjectedRasterTile {
- def apply(t: Tile, extent: Extent, crs: CRS): ProjectedRasterTile = ConcreteProjectedRasterTile(t, extent, crs)
- def apply(pr: ProjectedRaster[Tile]): ProjectedRasterTile = ConcreteProjectedRasterTile(pr.tile, pr.extent, pr.crs)
+ def apply(t: Tile, extent: Extent, crs: CRS): ProjectedRasterTile =
+ ConcreteProjectedRasterTile(t, extent, crs)
+ def apply(pr: ProjectedRaster[Tile]): ProjectedRasterTile =
+ ConcreteProjectedRasterTile(pr.tile, pr.extent, pr.crs)
+ def apply(tiff: SinglebandGeoTiff): ProjectedRasterTile =
+ ConcreteProjectedRasterTile(tiff.tile, tiff.extent, tiff.crs)
- case class ConcreteProjectedRasterTile(t: Tile, extent: Extent, crs: CRS) extends ProjectedRasterTile {
+ case class ConcreteProjectedRasterTile(t: Tile, extent: Extent, crs: CRS)
+ extends ProjectedRasterTile {
def delegate: Tile = t
+ override def convert(cellType: CellType): Tile =
+ ConcreteProjectedRasterTile(t.convert(cellType), extent, crs)
}
+
+ implicit val serializer: CatalystSerializer[ProjectedRasterTile] = new CatalystSerializer[ProjectedRasterTile] {
+ val TileType = new TileUDT()
+ override def schema: StructType = StructType(Seq(
+ StructField("tile_context", CatalystSerializer[TileContext].schema, false),
+ StructField("tile", TileType, false))
+ )
+
+ override protected def to[R](t: ProjectedRasterTile, io: CatalystIO[R]): R = io.create(
+ io.to(TileContext(t.extent, t.crs)),
+ io.to[Tile](t)(TileUDT.tileSerializer)
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): ProjectedRasterTile = {
+ val tile = io.get[Tile](t, 1)(TileUDT.tileSerializer)
+ tile match {
+ case r: RasterRefTile => r
+ case _ =>
+ val ctx = io.get[TileContext](t, 0)
+ val resolved = tile match {
+ case i: InternalRowTile => i.toArrayTile()
+ case o => o
+ }
+ ProjectedRasterTile(resolved, ctx.extent, ctx.crs)
+ }
+ }
+ }
+
+ implicit val prtEncoder: ExpressionEncoder[ProjectedRasterTile] = CatalystSerializerEncoder[ProjectedRasterTile](true)
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/DataBiasedOp.scala b/core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala
similarity index 94%
rename from core/src/main/scala/astraea/spark/rasterframes/functions/DataBiasedOp.scala
rename to core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala
index ee384041b..c2e2578a3 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/DataBiasedOp.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2017 Astraea, Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.functions
+package astraea.spark.rasterframes.util
import geotrellis.raster
import geotrellis.raster.isNoData
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala b/core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala
index 0f324bdd8..bbb23a282 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala
@@ -20,10 +20,15 @@
*/
package astraea.spark.rasterframes.util
-import astraea.spark.rasterframes.encoders.SparkDefaultEncoders
-import astraea.spark.rasterframes.functions.{CellCountAggregate, CellMeanAggregate}
+import astraea.spark.rasterframes.expressions.TileAssembler
+import astraea.spark.rasterframes.expressions.accessors._
+import astraea.spark.rasterframes.expressions.aggstats._
+import astraea.spark.rasterframes.expressions.generators._
+import astraea.spark.rasterframes.expressions.localops._
+import astraea.spark.rasterframes.expressions.tilestats._
+import astraea.spark.rasterframes.expressions.transformers._
import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics}
-import astraea.spark.rasterframes.{HasCellType, util}
+import astraea.spark.rasterframes.{functions => F}
import com.vividsolutions.jts.geom.Geometry
import geotrellis.proj4.CRS
import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
@@ -31,12 +36,8 @@ import geotrellis.raster.{CellType, Tile}
import org.apache.spark.annotation.Experimental
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.functions.{lit, udf}
-import org.apache.spark.sql.rf.VersionShims
+import org.apache.spark.sql.rf.VersionShims._
import org.apache.spark.sql.{Column, SQLContext, TypedColumn, rf}
-import astraea.spark.rasterframes.{expressions ⇒ E, functions ⇒ F}
-
-import scala.reflect.runtime.universe._
-
/**
* UDFs for working with Tiles in Spark DataFrames.
@@ -44,514 +45,326 @@ import scala.reflect.runtime.universe._
* @since 4/3/17
*/
object ZeroSevenCompatibilityKit {
- import SparkDefaultEncoders._
- //import util.NamedColumn
+ import astraea.spark.rasterframes.encoders.StandardEncoders._
trait RasterFunctions {
+ private val delegate = new astraea.spark.rasterframes.RasterFunctions {}
// format: off
- /** Create a row for each cell in Tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def explodeTiles(cols: Column*): Column = explodeTilesSample(1.0, None, cols: _*)
-
- /** Create a row for each cell in Tile with random sampling and optional seed. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def explodeTilesSample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column =
- E.ExplodeTiles(sampleFraction, seed, cols)
-
- /** Create a row for each cell in Tile with random sampling (no seed). */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def explodeTilesSample(sampleFraction: Double, cols: Column*): Column =
- E.ExplodeTiles(sampleFraction, None, cols)
-
- /** Query the number of (cols, rows) in a Tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileDimensions(col: Column): Column = E.GetDimensions(col)
-
- /** Flattens Tile into an array. A numeric type parameter is required. */
- @Experimental
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileToArray[T: HasCellType: TypeTag](col: Column): TypedColumn[Any, Array[T]] = withAlias("tileToArray", col)(
- udf[Array[T], Tile](F.tileToArray).apply(col)
- ).as[Array[T]]
-
- @Experimental
- /** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def arrayToTile(arrayCol: Column, cols: Int, rows: Int) = withAlias("array_to_tile", arrayCol)(
- udf[Tile, AnyRef](F.arrayToTile(cols, rows)).apply(arrayCol)
- )
-
- /** Create a Tile from a column of cell data with location indexes and preform cell conversion. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def assembleTile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] =
- convertCellType(F.TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile]
-
- /** Create a Tile from a column of cell data with location indexes. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def assembleTile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] =
- F.TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows)
-
- /** Extract the Tile's cell type */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def cellType(col: Column): TypedColumn[Any, CellType] = E.GetCellType(col)
-
- /** Change the Tile's cell type */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def convertCellType(col: Column, cellType: CellType): TypedColumn[Any, Tile] =
- E.SetCellType(col, cellType)
-
- /** Change the Tile's cell type */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def convertCellType(col: Column, cellTypeName: String): TypedColumn[Any, Tile] =
- E.SetCellType(col, cellTypeName)
-
- /** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def boundsGeometry(bounds: Column): TypedColumn[Any, Geometry] = E.BoundsToGeometry(bounds)
-
- /** Assign a `NoData` value to the Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def withNoData(col: Column, nodata: Double) = withAlias("withNoData", col)(
- udf[Tile, Tile](F.withNoData(nodata)).apply(col)
- ).as[Tile]
-
- /** Compute the full column aggregate floating point histogram. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggHistogram(col: Column): TypedColumn[Any, CellHistogram] =
- withAlias("histogram", col)(
- F.aggHistogram(col)
- ).as[CellHistogram]
-
- /** Compute the full column aggregate floating point statistics. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggStats(col: Column): TypedColumn[Any, CellStatistics] = withAlias("aggStats", col)(
- F.aggStats(col)
- ).as[CellStatistics]
-
- /** Computes the column aggregate mean. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggMean(col: Column) = CellMeanAggregate(col)
-
- /** Computes the number of non-NoData cells in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggDataCells(col: Column) = CellCountAggregate(true, col)
-
- /** Computes the number of NoData cells in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggNoDataCells(col: Column) = CellCountAggregate(false, col)
-
- /** Compute the Tile-wise mean */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileMean(col: Column): TypedColumn[Any, Double] =
- withAlias("tileMean", col)(
- udf[Double, Tile](F.tileMean).apply(col)
- ).as[Double]
-
- /** Compute the Tile-wise sum */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileSum(col: Column): TypedColumn[Any, Double] =
- withAlias("tileSum", col)(
- udf[Double, Tile](F.tileSum).apply(col)
- ).as[Double]
-
- /** Compute the minimum cell value in tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileMin(col: Column): TypedColumn[Any, Double] =
- withAlias("tileMin", col)(
- udf[Double, Tile](F.tileMin).apply(col)
- ).as[Double]
-
- /** Compute the maximum cell value in tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileMax(col: Column): TypedColumn[Any, Double] =
- withAlias("tileMax", col)(
- udf[Double, Tile](F.tileMax).apply(col)
- ).as[Double]
-
- /** Compute TileHistogram of Tile values. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileHistogram(col: Column): TypedColumn[Any, CellHistogram] =
- withAlias("tileHistogram", col)(
- udf[CellHistogram, Tile](F.tileHistogram).apply(col)
- ).as[CellHistogram]
-
- /** Compute statistics of Tile values. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileStats(col: Column): TypedColumn[Any, CellStatistics] =
- withAlias("tileStats", col)(
- udf[CellStatistics, Tile](F.tileStats).apply(col)
- ).as[CellStatistics]
-
- /** Counts the number of non-NoData cells per Tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def dataCells(tile: Column): TypedColumn[Any, Long] =
- withAlias("dataCells", tile)(
- udf(F.dataCells).apply(tile)
- ).as[Long]
-
- /** Counts the number of NoData cells per Tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def noDataCells(tile: Column): TypedColumn[Any, Long] =
- withAlias("noDataCells", tile)(
- udf(F.noDataCells).apply(tile)
- ).as[Long]
-
-
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def isNoDataTile(tile: Column): TypedColumn[Any, Boolean] =
- withAlias("isNoDataTile", tile)(
- udf(F.isNoDataTile).apply(tile)
- ).as[Boolean]
-
- /** Compute cell-local aggregate descriptive statistics for a column of Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggStats(col: Column): Column =
- withAlias("localAggStats", col)(
- F.localAggStats(col)
- )
-
- /** Compute the cell-wise/local max operation between Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggMax(col: Column): TypedColumn[Any, Tile] =
- withAlias("localAggMax", col)(
- F.localAggMax(col)
- ).as[Tile]
-
- /** Compute the cellwise/local min operation between Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggMin(col: Column): TypedColumn[Any, Tile] =
- withAlias("localAggMin", col)(
- F.localAggMin(col)
- ).as[Tile]
-
- /** Compute the cellwise/local mean operation between Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggMean(col: Column): TypedColumn[Any, Tile] =
- withAlias("localAggMean", col)(
- F.localAggMean(col)
- ).as[Tile]
-
- /** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggDataCells(col: Column): TypedColumn[Any, Tile] =
- withAlias("localCount", col)(
- F.localAggCount(col)
- ).as[Tile]
-
- /** Compute the cellwise/local count of NoData cells for all Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggNoDataCells(col: Column): TypedColumn[Any, Tile] =
- withAlias("localNodataCount", col)(
- F.localAggNodataCount(col)
- ).as[Tile]
-
- /** Cellwise addition between two Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAdd(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("local_add", left, right)(
- udf(F.localAdd).apply(left, right)
- ).as[Tile]
-
- /** Cellwise addition of a scalar to a tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAddScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match {
- case i: Int => F.localAddScalarInt(_: Tile, i)
- case d: Double => F.localAddScalar(_: Tile, d)
- }
-
- udf(f).apply(tileCol).as(s"local_add_scalar($tileCol, $value)").as[Tile]
- }
-
- /** Cellwise subtraction between two Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localSubtract(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localSubtract", left, right)(
- udf(F.localSubtract).apply(left, right)
- ).as[Tile]
-
- /** Cellwise subtraction of a scalar from a tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localSubtractScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match {
- case i: Int => F.localSubtractScalarInt(_: Tile, i)
- case d: Double => F.localSubtractScalar(_: Tile, d)
- }
-
- udf(f).apply(tileCol).as(s"localSubtractScalar($tileCol, $value)").as[Tile]
- }
-
- /** Cellwise multiplication between two Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localMultiply(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localMultiply", left, right)(
- udf(F.localMultiply).apply(left, right)
- ).as[Tile]
-
- /** Cellwise multiplication of a tile by a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localMultiplyScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match {
- case i: Int => F.localMultiplyScalarInt(_: Tile, i)
- case d: Double => F.localMultiplyScalar(_: Tile, d)
- }
-
- udf(f).apply(tileCol).as(s"localMultiplyScalar($tileCol, $value)").as[Tile]
- }
-
- /** Cellwise division between two Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localDivide(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localDivide", left, right)(
- udf(F.localDivide).apply(left, right)
- ).as[Tile]
-
- /** Cellwise division of a tile by a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localDivideScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match {
- case i: Int => F.localDivideScalarInt(_: Tile, i)
- case d: Double => F.localDivideScalar(_: Tile, d)
- }
-
- udf(f).apply(tileCol).as(s"localDivideScalar($tileCol, $value)").as[Tile]
- }
-
- /** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAlgebra(op: LocalTileBinaryOp, left: Column, right: Column):
- TypedColumn[Any, Tile] =
- withAlias(opName(op), left, right)(
- udf[Tile, Tile, Tile](op.apply).apply(left, right)
- ).as[Tile]
-
- /** Compute the normalized difference of two tile columns */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def normalizedDifference(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("normalizedDifference", left, right)(
- udf(F.normalizedDifference).apply(left, right)
- ).as[Tile]
-
- /** Constructor for constant tile column */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def makeConstantTile(value: Number, cols: Int, rows: Int, cellType: String): TypedColumn[Any, Tile] =
- udf(() => F.makeConstantTile(value, cols, rows, cellType)).apply().as(s"constant_$cellType").as[Tile]
-
- /** Alias for column of constant tiles of zero */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileZeros(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] =
- udf(() => F.tileZeros(cols, rows, cellType)).apply().as(s"zeros_$cellType").as[Tile]
-
- /** Alias for column of constant tiles of one */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileOnes(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] =
- udf(() => F.tileOnes(cols, rows, cellType)).apply().as(s"ones_$cellType").as[Tile]
-
- /** Where the mask tile equals the mask value, replace values in the source tile with NODATA */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def maskByValue(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
- withAlias("maskByValue", sourceTile, maskTile, maskValue)(
- udf(F.maskByValue).apply(sourceTile, maskTile, maskValue)
- ).as[Tile]
-
- /** Where the mask tile DOES NOT contain NODATA, replace values in the source tile with NODATA */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def inverseMask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
- withAlias("inverseMask", sourceTile, maskTile)(
- udf(F.inverseMask).apply(sourceTile, maskTile)
- ).as[Tile]
-
- /** Reproject a column of geometry from one CRS to another. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def reprojectGeometry(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] =
- withAlias("reprojectGeometry", sourceGeom)(
- udf(F.reprojectGeometry(_: Geometry, srcCRS, dstCRS)).apply(sourceGeom)
- ).as[Geometry]
-
- /** Render Tile as ASCII string for debugging purposes. */
- @Experimental
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def renderAscii(col: Column): TypedColumn[Any, String] =
- withAlias("renderAscii", col)(
- udf[String, Tile](F.renderAscii).apply(col)
- ).as[String]
-
- /** Cellwise less than value comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localLess(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localLess", left, right)(
- udf(F.localLess).apply(left, right)
- ).as[Tile]
-
-
- /** Cellwise less than value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localLessScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int => F.localLessScalarInt(_: Tile, i)
- case d: Double => F.localLessScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"localLessScalar($tileCol, $value)").as[Tile]
- }
-
- /** Cellwise less than or equal to value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localLessEqual(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localLessEqual", left, right)(
- udf(F.localLess).apply(left, right)
+ /** Create a row for each cell in Tile. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def explodeTiles(cols: Column*): Column = delegate.explode_tiles(cols: _*)
+
+ /** Create a row for each cell in Tile with random sampling and optional seed. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def explodeTilesSample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column =
+ ExplodeTiles(sampleFraction, seed, cols)
+
+ /** Create a row for each cell in Tile with random sampling (no seed). */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def explodeTilesSample(sampleFraction: Double, cols: Column*): Column =
+ ExplodeTiles(sampleFraction, None, cols)
+
+ /** Query the number of (cols, rows) in a Tile. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileDimensions(col: Column): Column = GetDimensions(col)
+
+ @Experimental
+ /** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def arrayToTile(arrayCol: Column, cols: Int, rows: Int) = withAlias("array_to_tile", arrayCol)(
+ udf[Tile, AnyRef](F.arrayToTile(cols, rows)).apply(arrayCol)
+ )
+
+ /** Create a Tile from a column of cell data with location indexes and preform cell conversion. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def assembleTile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] =
+ convertCellType(TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile]
+
+ /** Create a Tile from a column of cell data with location indexes. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def assembleTile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] =
+ TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows)
+
+ /** Extract the Tile's cell type */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def cellType(col: Column): TypedColumn[Any, CellType] = GetCellType(col)
+
+ /** Change the Tile's cell type */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def convertCellType(col: Column, cellType: CellType): TypedColumn[Any, Tile] =
+ SetCellType(col, cellType)
+
+ /** Change the Tile's cell type */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def convertCellType(col: Column, cellTypeName: String): TypedColumn[Any, Tile] =
+ SetCellType(col, cellTypeName)
+
+ /** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def boundsGeometry(bounds: Column): TypedColumn[Any, Geometry] = BoundsToGeometry(bounds)
+
+ /** Assign a `NoData` value to the Tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def withNoData(col: Column, nodata: Double) = withAlias("withNoData", col)(
+ udf[Tile, Tile](F.withNoData(nodata)).apply(col)
).as[Tile]
- /** Cellwise less than or equal to value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localLessEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int => F.localLessEqualScalarInt(_: Tile, i)
- case d: Double => F.localLessEqualScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"localLessEqualScalar($tileCol, $value)").as[Tile]
- }
-
- /** Cellwise greater than value comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localGreater(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localGreater", left, right)(
- udf(F.localGreater).apply(left, right)
- ).as[Tile]
+ /** Compute the full column aggregate floating point histogram. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.agg_approx_histogram(col)
+ /** Compute the full column aggregate floating point statistics. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.agg_stats(col)
- /** Cellwise greater than value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localGreaterScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int => F.localGreaterScalarInt(_: Tile, i)
- case d: Double => F.localGreaterScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"localGreaterScalar($tileCol, $value)").as[Tile]
+ /** Computes the column aggregate mean. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggMean(col: Column) = CellMeanAggregate(col)
+
+ /** Computes the number of non-NoData cells in a column. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggDataCells(col: Column): TypedColumn[Any, Long] = delegate.agg_data_cells(col)
+
+ /** Computes the number of NoData cells in a column. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggNoDataCells(col: Column): TypedColumn[Any, Long] = delegate.agg_no_data_cells(col)
+
+ /** Compute the Tile-wise mean */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileMean(col: Column): TypedColumn[Any, Double] = delegate.tile_mean(col)
+
+ /** Compute the Tile-wise sum */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileSum(col: Column): TypedColumn[Any, Double] = delegate.tile_sum(col)
+
+ /** Compute the minimum cell value in tile. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileMin(col: Column): TypedColumn[Any, Double] = delegate.tile_min(col)
+
+ /** Compute the maximum cell value in tile. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileMax(col: Column): TypedColumn[Any, Double] = delegate.tile_max(col)
+
+ /** Compute TileHistogram of Tile values. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.tile_histogram(col)
+
+ /** Compute statistics of Tile values. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.tile_stats(col)
+
+ /** Counts the number of non-NoData cells per Tile. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def dataCells(tile: Column): TypedColumn[Any, Long] = delegate.data_cells(tile)
+
+ /** Counts the number of NoData cells per Tile. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def noDataCells(tile: Column): TypedColumn[Any, Long] = delegate.no_data_cells(tile)
+
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def isNoDataTile(tile: Column): TypedColumn[Any, Boolean] = delegate.is_no_data_tile(tile)
+
+ /** Compute cell-local aggregate descriptive statistics for a column of Tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggStats(col: Column): Column = delegate.agg_local_stats(col)
+
+ /** Compute the cell-wise/local max operation between Tiles in a column. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggMax(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_max(col)
+
+ /** Compute the cellwise/local min operation between Tiles in a column. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggMin(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_min(col)
+
+ /** Compute the cellwise/local mean operation between Tiles in a column. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggMean(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_mean(col)
+
+ /** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggDataCells(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_data_cells(col)
+
+ /** Compute the cellwise/local count of NoData cells for all Tiles in a column. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggNoDataCells(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_no_data_cells(col)
+
+ /** Cellwise addition between two Tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAdd(left: Column, right: Column): Column = delegate.local_add(left, right)
+
+ /** Cellwise addition of a scalar to a tile. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAddScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_add(tileCol, value)
+
+ /** Cellwise subtraction between two Tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localSubtract(left: Column, right: Column): Column = delegate.local_subtract(left, right)
+
+ /** Cellwise subtraction of a scalar from a tile. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localSubtractScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_subtract(tileCol, value)
+ /** Cellwise multiplication between two Tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localMultiply(left: Column, right: Column): Column = delegate.local_multiply(left, right)
+
+ /** Cellwise multiplication of a tile by a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localMultiplyScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_multiply(tileCol, value)
+
+ /** Cellwise division between two Tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localDivide(left: Column, right: Column): Column = delegate.local_divide(left, right)
+
+ /** Cellwise division of a tile by a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localDivideScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_divide(tileCol, value)
+ /** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAlgebra(op: LocalTileBinaryOp, left: Column, right: Column):
+ TypedColumn[Any, Tile] =
+ withAlias(opName(op), left, right)(
+ udf[Tile, Tile, Tile](op.apply).apply(left, right)
+ ).as[Tile]
+
+ /** Compute the normalized difference of two tile columns */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def normalizedDifference(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.normalized_difference(left, right)
+
+ /** Constructor for constant tile column */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def makeConstantTile(value: Number, cols: Int, rows: Int, cellType: String): TypedColumn[Any, Tile] =
+ udf(() => F.makeConstantTile(value, cols, rows, cellType)).apply().as(s"constant_$cellType").as[Tile]
+
+ /** Alias for column of constant tiles of zero */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileZeros(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] =
+ udf(() => F.tileZeros(cols, rows, cellType)).apply().as(s"zeros_$cellType").as[Tile]
+
+ /** Alias for column of constant tiles of one */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileOnes(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] =
+ udf(() => F.tileOnes(cols, rows, cellType)).apply().as(s"ones_$cellType").as[Tile]
+
+ /** Where the mask tile equals the mask value, replace values in the source tile with NODATA */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def maskByValue(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
+ delegate.mask_by_value(sourceTile, maskTile, maskValue)
+
+ /** Where the mask tile DOES NOT contain NODATA, replace values in the source tile with NODATA */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def inverseMask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
+ delegate.inverse_mask(sourceTile, maskTile)
+
+ /** Reproject a column of geometry from one CRS to another. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def reprojectGeometry(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] =
+ delegate.reproject_geometry(sourceGeom, srcCRS, dstCRS)
+
+ /** Render Tile as ASCII string for debugging purposes. */
+ @Experimental
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def renderAscii(col: Column): TypedColumn[Any, String] = delegate.render_ascii(col)
+
+ /** Cellwise less than value comparison between two tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localLess(left: Column, right: Column): TypedColumn[Any, Tile] =
+ delegate.local_less(left, right)
+
+
+ /** Cellwise less than value comparison between a tile and a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localLessScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_less(tileCol, value)
+
+ /** Cellwise less than or equal to value comparison between a tile and a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localLessEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_less_equal(left, right)
+
+ /** Cellwise less than or equal to value comparison between a tile and a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localLessEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_less_equal(tileCol, value)
+
+ /** Cellwise greater than value comparison between two tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localGreater(left: Column, right: Column): TypedColumn[Any, Tile] =
+ delegate.local_greater(left, right)
+
+ /** Cellwise greater than value comparison between a tile and a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localGreaterScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_greater(tileCol, value)
+
+ /** Cellwise greater than or equal to value comparison between two tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localGreaterEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_greater_equal(left, right)
+
+ /** Cellwise greater than or equal to value comparison between a tile and a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localGreaterEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_greater_equal(tileCol, value)
+
+ /** Cellwise equal to value comparison between two tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_equal(left, right)
+
+ /** Cellwise equal to value comparison between a tile and a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_equal(tileCol, value)
+
+ /** Cellwise inequality comparison between two tiles. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localUnequal(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_unequal(left, right)
+
+ /** Cellwise inequality comparison between a tile and a scalar. */
+ @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localUnequalScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_unequal(tileCol, value)
}
- /** Cellwise greater than or equal to value comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localGreaterEqual(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localGreaterEqual", left, right)(
- udf(F.localGreaterEqual).apply(left, right)
- ).as[Tile]
-
- /** Cellwise greater than or equal to value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localGreaterEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int => F.localGreaterEqualScalarInt(_: Tile, i)
- case d: Double => F.localGreaterEqualScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"localGreaterEqualScalar($tileCol, $value)").as[Tile]
- }
-
- /** Cellwise equal to value comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localEqual(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localEqual", left, right)(
- udf(F.localEqual).apply(left, right)
- ).as[Tile]
-
- /** Cellwise equal to value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int => F.localEqualScalarInt(_: Tile, i)
- case d: Double => F.localEqualScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"localEqualScalar($tileCol, $value)").as[Tile]
- }
- /** Cellwise inequality comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localUnequal(left: Column, right: Column): TypedColumn[Any, Tile] =
- withAlias("localUnequal", left, right)(
- udf(F.localUnequal).apply(left, right)
- ).as[Tile]
-
- /** Cellwise inequality comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localUnequalScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = {
- val f = value match{
- case i: Int => F.localUnequalScalarInt(_: Tile, i)
- case d: Double => F.localUnequalScalar(_: Tile, d)
- }
- udf(f).apply(tileCol).as(s"localUnequalScalar($tileCol, $value)").as[Tile]
- }
-}
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x.", "0.8.0")
def register(sqlContext: SQLContext): Unit = {
/** Unary expression builder builder. */
- def ub[A, B](f: A ⇒ B)(a: Seq[A]): B = f(a.head)
+ def ub[A, B](f: A => B)(a: Seq[A]): B = f(a.head)
/** Binary expression builder builder. */
- def bb[A, B](f: (A, A) ⇒ B)(a: Seq[A]): B = f(a.head, a.last)
+ def bb[A, B](f: (A, A) => B)(a: Seq[A]): B = f(a.head, a.last)
// Expression-oriented functions have a different registration scheme
// Currently have to register with the `builtin` registry due to Spark data hiding.
val registry: FunctionRegistry = rf.registry(sqlContext)
- VersionShims.registerExpression(registry, "rf_explodeTiles", E.ExplodeTiles.apply(1.0, None, _))
- VersionShims.registerExpression(registry, "rf_cellType", ub(E.GetCellType.apply))
- VersionShims.registerExpression(registry, "rf_convertCellType", bb(E.SetCellType.apply))
- VersionShims.registerExpression(registry, "rf_tileDimensions", ub(E.GetDimensions.apply))
- VersionShims.registerExpression(registry, "rf_boundsGeometry", ub(E.BoundsToGeometry.apply))
-
+ registry.registerFunc("rf_explodeTiles", ExplodeTiles.apply(1.0, None, _))
+ registry.registerFunc("rf_cellType", ub(GetCellType.apply))
+ registry.registerFunc("rf_convertCellType", bb(SetCellType.apply))
+ registry.registerFunc("rf_tileDimensions", ub(GetDimensions.apply))
+ registry.registerFunc("rf_boundsGeometry", ub(BoundsToGeometry.apply))
+ registry.registerFunc("rf_localAdd", bb(Add.apply))
+ registry.registerFunc("rf_localSubtract", bb(Subtract.apply))
+ registry.registerFunc("rf_localMultiply", bb(Multiply.apply))
+ registry.registerFunc("rf_localDivide", bb(Divide.apply))
+ registry.registerFunc("rf_normalizedDifference", bb(NormalizedDifference.apply))
+ registry.registerFunc("rf_localLess", bb(Less.apply))
+ registry.registerFunc("rf_localLessEqual", bb(LessEqual.apply))
+ registry.registerFunc("rf_localGreater", bb(Greater.apply))
+ registry.registerFunc("rf_localGreaterEqual", bb(GreaterEqual.apply))
+ registry.registerFunc("rf_localEqual", bb(Equal.apply))
+ registry.registerFunc("rf_localUnequal", bb(Unequal.apply))
+ registry.registerFunc("rf_tileSum", ub(Sum.apply))
+ registry.registerFunc("rf_dataCells", ub(DataCells.apply))
+ registry.registerFunc("rf_noDataCells", ub(NoDataCells.apply))
+ registry.registerFunc("rf_isNoDataTile", ub(IsNoDataTile.apply))
+ registry.registerFunc("rf_tileMin", ub(TileMin.apply))
+ registry.registerFunc("rf_tileMax", ub(TileMax.apply))
+ registry.registerFunc("rf_tileMean", ub(TileMean.apply))
+ registry.registerFunc("rf_tileStats", ub(TileStats.apply))
+ registry.registerFunc("rf_tileHistogram", ub(TileHistogram.apply))
+ registry.registerFunc("rf_aggStats", ub(CellStatsAggregate.CellStatsAggregateUDAF.apply))
+ registry.registerFunc("rf_aggHistogram", ub(HistogramAggregate.HistogramAggregateUDAF.apply))
+ registry.registerFunc("rf_localAggStats", ub(LocalStatsAggregate.LocalStatsAggregateUDAF.apply))
+ registry.registerFunc("rf_renderAscii", ub(DebugRender.RenderMatrix.apply))
+ registry.registerFunc("rf_localAggMax", ub(LocalTileOpAggregate.LocalMaxUDAF.apply))
+ registry.registerFunc("rf_localAggMin", ub(LocalTileOpAggregate.LocalMinUDAF.apply))
+ registry.registerFunc("rf_localAggCount", ub(LocalCountAggregate.LocalDataCellsUDAF.apply))
+ registry.registerFunc("rf_localAggMean", ub(LocalMeanAggregate.apply))
- sqlContext.udf.register("rf_maskByValue", F.maskByValue)
- sqlContext.udf.register("rf_inverseMask", F.inverseMask)
sqlContext.udf.register("rf_makeConstantTile", F.makeConstantTile)
sqlContext.udf.register("rf_tileZeros", F.tileZeros)
sqlContext.udf.register("rf_tileOnes", F.tileOnes)
- sqlContext.udf.register("rf_tileToArrayInt", F.tileToArray[Int])
- sqlContext.udf.register("rf_tileToArrayDouble", F.tileToArray[Double])
- sqlContext.udf.register("rf_aggHistogram", F.aggHistogram)
- sqlContext.udf.register("rf_aggStats", F.aggStats)
- sqlContext.udf.register("rf_tileMin", F.tileMin)
- sqlContext.udf.register("rf_tileMax", F.tileMax)
- sqlContext.udf.register("rf_tileMean", F.tileMean)
- sqlContext.udf.register("rf_tileSum", F.tileSum)
- sqlContext.udf.register("rf_tileHistogram", F.tileHistogram)
- sqlContext.udf.register("rf_tileStats", F.tileStats)
- sqlContext.udf.register("rf_dataCells", F.dataCells)
- sqlContext.udf.register("rf_noDataCells", F.noDataCells)
- sqlContext.udf.register("rf_isNoDataTile", F.isNoDataTile)
- sqlContext.udf.register("rf_localAggStats", F.localAggStats)
- sqlContext.udf.register("rf_localAggMax", F.localAggMax)
- sqlContext.udf.register("rf_localAggMin", F.localAggMin)
- sqlContext.udf.register("rf_localAggMean", F.localAggMean)
- sqlContext.udf.register("rf_localAggCount", F.localAggCount)
- sqlContext.udf.register("rf_localAdd", F.localAdd)
- sqlContext.udf.register("rf_localAddScalar", F.localAddScalar)
- sqlContext.udf.register("rf_localAddScalarInt", F.localAddScalarInt)
- sqlContext.udf.register("rf_localSubtract", F.localSubtract)
- sqlContext.udf.register("rf_localSubtractScalar", F.localSubtractScalar)
- sqlContext.udf.register("rf_localSubtractScalarInt", F.localSubtractScalarInt)
- sqlContext.udf.register("rf_localMultiply", F.localMultiply)
- sqlContext.udf.register("rf_localMultiplyScalar", F.localMultiplyScalar)
- sqlContext.udf.register("rf_localMultiplyScalarInt", F.localMultiplyScalarInt)
- sqlContext.udf.register("rf_localDivide", F.localDivide)
- sqlContext.udf.register("rf_localDivideScalar", F.localDivideScalar)
- sqlContext.udf.register("rf_localDivideScalarInt", F.localDivideScalarInt)
- sqlContext.udf.register("rf_normalizedDifference", F.normalizedDifference)
sqlContext.udf.register("rf_cellTypes", F.cellTypes)
- sqlContext.udf.register("rf_renderAscii", F.renderAscii)
- sqlContext.udf.register("rf_lessScalar", F.localLessScalar)
- sqlContext.udf.register("rf_lessScalarInt", F.localLessScalarInt)
- sqlContext.udf.register("rf_lessEqual", F.localLessEqual)
- sqlContext.udf.register("rf_lessEqualScalar", F.localLessEqualScalar)
- sqlContext.udf.register("rf_lessEqualScalarInt", F.localLessEqualScalarInt)
- sqlContext.udf.register("rf_greater", F.localGreater)
- sqlContext.udf.register("rf_greaterScalar", F.localGreaterScalar)
- sqlContext.udf.register("rf_greaterScalarInt", F.localGreaterScalarInt)
- sqlContext.udf.register("rf_greaterEqual", F.localGreaterEqual)
- sqlContext.udf.register("rf_greaterEqualScalar", F.localGreaterEqualScalar)
- sqlContext.udf.register("rf_greaterEqualScalarInt", F.localGreaterEqualScalarInt)
- sqlContext.udf.register("rf_equal", F.localEqual)
- sqlContext.udf.register("rf_equalScalar", F.localEqualScalar)
- sqlContext.udf.register("rf_equalScalarInt", F.localEqualScalarInt)
- sqlContext.udf.register("rf_unequal", F.localUnequal)
- sqlContext.udf.register("rf_unequalScalar", F.localUnequalScalar)
- sqlContext.udf.register("rf_unequalScalarInt", F.localUnequalScalarInt)
sqlContext.udf.register("rf_reprojectGeometry", F.reprojectGeometryCRSName)
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/package.scala b/core/src/main/scala/astraea/spark/rasterframes/util/package.scala
index 7713a0760..02a365cea 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/package.scala
+++ b/core/src/main/scala/astraea/spark/rasterframes/util/package.scala
@@ -20,7 +20,8 @@
package astraea.spark.rasterframes
import geotrellis.proj4.CRS
-import geotrellis.raster.CellGrid
+import geotrellis.raster
+import geotrellis.raster.{CellGrid, Tile, isNoData}
import geotrellis.raster.crop.TileCropMethods
import geotrellis.raster.io.geotiff.reader.GeoTiffReader
import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
@@ -36,6 +37,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.rf._
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.{Column, DataFrame, SQLContext}
+import spire.syntax.cfor._
import scala.Boolean.box
@@ -89,9 +91,10 @@ package object util extends LazyLogging {
object CRSParser {
def apply(value: String): CRS = {
value match {
- case e if e.startsWith("EPSG") => CRS.fromName(e)
- case p if p.startsWith("+proj") => CRS.fromString(p)
- case w if w.startsWith("GEOGCS") => CRS.fromWKT(w)
+ case e if e.toUpperCase().startsWith("EPSG") => CRS.fromName(e) //not case-sensitive
+ case p if p.startsWith("+proj") => CRS.fromString(p) // case sensitive
+ case w if w.toUpperCase().startsWith("GEOGCS") => CRS.fromWKT(w) //only case-sensitive inside double quotes
+ case _ ⇒ throw new IllegalArgumentException("crs string must be either EPSG code, +proj string, or OGC WKT")
}
}
}
@@ -153,6 +156,31 @@ package object util extends LazyLogging {
analyzer(sqlContext).extendedResolutionRules
}
+ implicit class TileAsMatrix(val tile: Tile) extends AnyVal {
+ def renderMatrix(significantDigits: Int): String = {
+ val ND = s"%${significantDigits+5}s".format(Double.NaN)
+ val fmt = s"% ${significantDigits+5}.${significantDigits}g"
+ val buf = new StringBuilder("[")
+ cfor(0)(_ < tile.rows, _ + 1) { row =>
+ if(row > 0) buf.append(' ')
+ buf.append('[')
+ cfor(0)(_ < tile.cols, _ + 1) { col =>
+ val v = tile.getDouble(col, row)
+ if (isNoData(v)) buf.append(ND)
+ else buf.append(fmt.format(v))
+
+ if (col < tile.cols - 1)
+ buf.append(',')
+ }
+ buf.append(']')
+ if (row < tile.rows - 1)
+ buf.append(",\n")
+ }
+ buf.append("]")
+ buf.toString()
+ }
+ }
+
object Shims {
// GT 1.2.1 to 2.0.0
def toArrayTile[T <: CellGrid](tile: T): T =
diff --git a/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala b/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala
index 6b95d359e..75ac0f7cf 100644
--- a/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala
@@ -23,11 +23,9 @@ package org.apache.spark.sql.rf
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.ref.RasterRef
-import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
-import astraea.spark.rasterframes.tiles.{InternalRowTile, ProjectedRasterTile}
+import astraea.spark.rasterframes.model.{Cells, TileDataContext}
+import astraea.spark.rasterframes.tiles.InternalRowTile
import geotrellis.raster._
-import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.{DataType, _}
@@ -72,68 +70,30 @@ class TileUDT extends UserDefinedType[Tile] {
case object TileUDT {
UDTRegistration.register(classOf[Tile].getName, classOf[TileUDT].getName)
- UDTRegistration.register(classOf[ProjectedRasterTile].getName, classOf[TileUDT].getName)
final val typeName: String = "tile"
- // Column mapping which must match layout below
- object C {
- val CELL_TYPE = 0
- val COLS = 1
- val ROWS = 2
- val CELLS = 3
- val REF = 4
- }
-
implicit def tileSerializer: CatalystSerializer[Tile] = new CatalystSerializer[Tile] {
import scala.language.reflectiveCalls
override def schema: StructType = StructType(Seq(
- StructField("cell_type", StringType, false),
- StructField("cols", ShortType, false),
- StructField("rows", ShortType, false),
- StructField("cells", BinaryType, true),
- StructField("ref", CatalystSerializer[RasterRef].schema, true)
+ StructField("cell_context", CatalystSerializer[TileDataContext].schema, false),
+ StructField("cell_data", CatalystSerializer[Cells].schema, false)
))
- def isRef[R](row: R, io: CatalystIO[R]): Boolean = io.isNullAt(row, C.CELLS)
-
- override def to[R](t: Tile, io: CatalystIO[R]): R = {
- t match {
- case ref: RasterRefTile ⇒
- io.create(
- io.encode(ref.cellType.name),
- ref.cols.toShort,
- ref.rows.toShort,
- null,
- io.to(ref.rr)
- )
- case _ ⇒
- io.create(
- io.encode(t.cellType.name),
- t.cols.toShort,
- t.rows.toShort,
- t.toBytes,
- null
- )
- }
- }
+ override def to[R](t: Tile, io: CatalystIO[R]): R = io.create(
+ io.to(TileDataContext(t)),
+ io.to(Cells(t))
+ )
+
override def from[R](row: R, io: CatalystIO[R]): Tile = {
+ val cells = io.get[Cells](row, 1)
+
row match {
- case ir: InternalRow if !isRef(row, io) ⇒ new InternalRowTile(ir)
+ case ir: InternalRow if !cells.isRef ⇒ new InternalRowTile(ir)
case _ ⇒
- if(isRef(row, io)) {
- val ref = io.get[RasterRef](row, C.REF)
- RasterRefTile(ref)
- }
- else {
- val ct = CellType.fromName(io.getString(row, C.CELL_TYPE))
- val cols = io.getShort(row, C.COLS)
- val rows = io.getShort(row, C.ROWS)
-
- val data = io.getByteArray(row, 3)
- ArrayTile.fromBytes(data, ct, cols, rows)
- }
+ val ctx = io.get[TileDataContext](row, 0)
+ cells.toTile(ctx)
}
}
}
diff --git a/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala b/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala
index 3a05af0a4..b9eb96981 100644
--- a/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala
@@ -2,16 +2,21 @@ package org.apache.spark.sql.rf
import java.lang.reflect.{Constructor, Method}
+import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.{FunctionBuilder, expressionInfo}
import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.{DataFrame, Dataset, SQLContext}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ScalaUDF}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SQLContext}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryExpression, Expression, ExpressionDescription, ExpressionInfo, RuntimeReplaceable, ScalaUDF}
import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, InvokeLike}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.sources.BaseRelation
import org.apache.spark.sql.types.DataType
+import scala.reflect._
+import scala.util.{Failure, Success, Try}
+
/**
* Collection of Spark version compatibility adapters.
*
@@ -102,24 +107,91 @@ object VersionShims {
}
}
- def registerExpression(registry: FunctionRegistry, name: String, builder: FunctionRegistry.FunctionBuilder): Unit = {
- // Spark 2.3 introduced a new way of specifying Functions
- val spark23FI = "org.apache.spark.sql.catalyst.FunctionIdentifier"
- registry.getClass.getDeclaredMethods
- .filter(m ⇒ m.getName == "registerFunction" && m.getParameterCount == 2)
- .foreach { m ⇒
- val firstParam = m.getParameterTypes()(0)
- if(firstParam == classOf[String])
- m.invoke(registry, name, builder)
- else if(firstParam.getName == spark23FI) {
- val fic = Class.forName(spark23FI)
- val ctor = fic.getConstructor(classOf[String], classOf[Option[_]])
- val fi = ctor.newInstance(name, None).asInstanceOf[Object]
- m.invoke(registry, fi, builder)
+ implicit class RichFunctionRegistry(registry: FunctionRegistry) {
+
+ def registerFunc(name: String, builder: FunctionRegistry.FunctionBuilder): Unit = {
+ // Spark 2.3 introduced a new way of specifying Functions
+ val spark23FI = "org.apache.spark.sql.catalyst.FunctionIdentifier"
+ registry.getClass.getDeclaredMethods
+ .filter(m ⇒ m.getName == "registerFunction" && m.getParameterCount == 2)
+ .foreach { m ⇒
+ val firstParam = m.getParameterTypes()(0)
+ if(firstParam == classOf[String])
+ m.invoke(registry, name, builder)
+ else if(firstParam.getName == spark23FI) {
+ val fic = Class.forName(spark23FI)
+ val ctor = fic.getConstructor(classOf[String], classOf[Option[_]])
+ val fi = ctor.newInstance(name, None).asInstanceOf[Object]
+ m.invoke(registry, fi, builder)
+ }
+ else {
+ throw new NotImplementedError("Unexpected FunctionRegistry API: " + m.toGenericString)
+ }
}
- else {
- throw new NotImplementedError("Unexpected FunctionRegistry API: " + m.toGenericString)
+ }
+
+ // Much of the code herein is copied from org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+ def registerExpression[T <: Expression: ClassTag](name: String): Unit = {
+ val clazz = classTag[T].runtimeClass
+
+ def expressionInfo: ExpressionInfo = {
+ val df = clazz.getAnnotation(classOf[ExpressionDescription])
+ if (df != null) {
+ if (df.extended().isEmpty) {
+ new ExpressionInfo(clazz.getCanonicalName, null, name, df.usage(), df.arguments(), df.examples(), df.note(), df.since())
+ } else {
+ // This exists for the backward compatibility with old `ExpressionDescription`s defining
+ // the extended description in `extended()`.
+ new ExpressionInfo(clazz.getCanonicalName, null, name, df.usage(), df.extended())
+ }
+ } else {
+ new ExpressionInfo(clazz.getCanonicalName, name)
}
}
+ def findBuilder: FunctionBuilder = {
+ val constructors = clazz.getConstructors
+ // See if we can find a constructor that accepts Seq[Expression]
+ val varargCtor = constructors.find(_.getParameterTypes.toSeq == Seq(classOf[Seq[_]]))
+ val builder = (expressions: Seq[Expression]) => {
+ if (varargCtor.isDefined) {
+ // If there is an apply method that accepts Seq[Expression], use that one.
+ Try(varargCtor.get.newInstance(expressions).asInstanceOf[Expression]) match {
+ case Success(e) => e
+ case Failure(e) =>
+ // the exception is an invocation exception. To get a meaningful message, we need the
+ // cause.
+ throw new AnalysisException(e.getCause.getMessage)
+ }
+ } else {
+ // Otherwise, find a constructor method that matches the number of arguments, and use that.
+ val params = Seq.fill(expressions.size)(classOf[Expression])
+ val f = constructors.find(_.getParameterTypes.toSeq == params).getOrElse {
+ val validParametersCount = constructors
+ .filter(_.getParameterTypes.forall(_ == classOf[Expression]))
+ .map(_.getParameterCount).distinct.sorted
+ val expectedNumberOfParameters = if (validParametersCount.length == 1) {
+ validParametersCount.head.toString
+ } else {
+ validParametersCount.init.mkString("one of ", ", ", " and ") +
+ validParametersCount.last
+ }
+ throw new AnalysisException(s"Invalid number of arguments for function ${clazz.getSimpleName}. " +
+ s"Expected: $expectedNumberOfParameters; Found: ${params.length}")
+ }
+ Try(f.newInstance(expressions : _*).asInstanceOf[Expression]) match {
+ case Success(e) => e
+ case Failure(e) =>
+ // the exception is an invocation exception. To get a meaningful message, we need the
+ // cause.
+ throw new AnalysisException(e.getCause.getMessage)
+ }
+ }
+ }
+
+ builder
+ }
+
+ registry.registerFunction(FunctionIdentifier(name), expressionInfo, findBuilder)
+ }
}
}
diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties
index 6d7d28723..378ae8e61 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -28,7 +28,7 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
log4j.logger.org.apache.spark.repl.Main=WARN
-log4j.logger.org.apache=WARN
+log4j.logger.org.apache=ERROR
log4j.logger.com.amazonaws=WARN
log4j.logger.geotrellis=INFO
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala
index 2f50fc2f9..a06b6444b 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala
@@ -81,7 +81,7 @@ class ExplodeSpec extends TestEnvironment with TestData {
.select($"tile".as[Double])
.collect()
- assert(cells.count(_.isNaN) === 1)
+ cells.count(_.isNaN) should be(1)
}
it("should handle user-defined NoData values in tile sampler") {
@@ -90,7 +90,7 @@ class ExplodeSpec extends TestEnvironment with TestData {
.select(explode_tiles($"tile"))
.select($"tile".as[Double])
.collect()
- assert(cells.count(_.isNaN) === tiles.size)
+ cells.count(_.isNaN) should be(tiles.size)
}
it("should convert tile into array") {
@@ -99,18 +99,18 @@ class ExplodeSpec extends TestEnvironment with TestData {
| rf_make_constant_tile(1, 10, 10, 'int8raw')
|) as intArray
|""".stripMargin)
- assert(query.as[Array[Int]].first.sum === 100)
+ query.as[Array[Int]].first.sum should be (100)
val tile = FloatConstantTile(1.1f, 10, 10, FloatCellType)
val df = Seq[Tile](tile).toDF("tile")
- val arrayDF = df.select(tile_to_array[Float]($"tile").as[Array[Float]])
- assert(arrayDF.first().sum === 110.0f +- 0.0001f)
+ val arrayDF = df.select(tile_to_array_double($"tile").as[Array[Double]])
+ arrayDF.first().sum should be (110.0 +- 0.0001)
}
it("should convert an array into a tile") {
val tile = FloatConstantTile(1.1f, 10, 10, FloatCellType)
val df = Seq[Tile](tile, null).toDF("tile")
- val arrayDF = df.withColumn("tileArray", tile_to_array[Float]($"tile"))
+ val arrayDF = df.withColumn("tileArray", tile_to_array_double($"tile"))
val back = arrayDF.withColumn("backToTile", array_to_tile($"tileArray", 10, 10))
diff --git a/core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala
new file mode 100644
index 000000000..da2ab9c56
--- /dev/null
+++ b/core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala
@@ -0,0 +1,695 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package astraea.spark.rasterframes
+import astraea.spark.rasterframes.TestData.injectND
+import astraea.spark.rasterframes.expressions.accessors.ExtractTile
+import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics}
+import astraea.spark.rasterframes.tiles.ProjectedRasterTile
+import geotrellis.proj4.LatLng
+import geotrellis.raster
+import geotrellis.raster.testkit.RasterMatchers
+import geotrellis.raster.{ArrayTile, BitCellType, ByteUserDefinedNoDataCellType, DoubleConstantNoDataCellType, ShortConstantNoDataCellType, Tile, UByteConstantNoDataCellType}
+import geotrellis.vector.Extent
+import org.apache.spark.sql.{AnalysisException, Encoders}
+import org.apache.spark.sql.functions._
+import org.scalatest.{FunSpec, Matchers}
+
+class RasterFunctionsSpec extends FunSpec
+ with TestEnvironment with Matchers with RasterMatchers {
+ import spark.implicits._
+
+ val extent = Extent(10, 20, 30, 40)
+ val crs = LatLng
+ val ct = ByteUserDefinedNoDataCellType(-2)
+ val cols = 10
+ val rows = cols
+ val tileSize = cols * rows
+ val tileCount = 10
+ val numND = 4
+ lazy val zero = TestData.projectedRasterTile(cols, rows, 0, extent, crs, ct)
+ lazy val one = TestData.projectedRasterTile(cols, rows, 1, extent, crs, ct)
+ lazy val two = TestData.projectedRasterTile(cols, rows, 2, extent, crs, ct)
+ lazy val three = TestData.projectedRasterTile(cols, rows, 3, extent, crs, ct)
+ lazy val six = ProjectedRasterTile(three * two, three.extent, three.crs)
+ lazy val nd = TestData.projectedRasterTile(cols, rows, -2, extent, crs, ct)
+ lazy val randTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextInt(), extent, crs, ct)
+ lazy val randNDTile = TestData.injectND(numND)(randTile)
+
+ lazy val randDoubleTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextGaussian(), extent, crs, DoubleConstantNoDataCellType)
+ lazy val randDoubleNDTile = TestData.injectND(numND)(randDoubleTile)
+ lazy val randPositiveDoubleTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextDouble() + 1e-6, extent, crs, DoubleConstantNoDataCellType)
+
+ val expectedRandNoData: Long = numND * tileCount
+ val expectedRandData: Long = cols * rows * tileCount - expectedRandNoData
+ lazy val randNDTilesWithNull = Seq.fill[Tile](tileCount)(injectND(numND)(
+ TestData.randomTile(cols, rows, UByteConstantNoDataCellType)
+ )).map(ProjectedRasterTile(_, extent, crs)) :+ null
+
+ implicit val pairEnc = Encoders.tuple(ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder)
+ implicit val tripEnc = Encoders.tuple(ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder)
+
+ describe("arithmetic tile operations") {
+ it("should local_add") {
+ val df = Seq((one, two)).toDF("one", "two")
+
+ val maybeThree = df.select(local_add($"one", $"two")).as[ProjectedRasterTile]
+ assertEqual(maybeThree.first(), three)
+
+ assertEqual(df.selectExpr("rf_local_add(one, two)").as[ProjectedRasterTile].first(), three)
+
+ val maybeThreeTile = df.select(local_add(ExtractTile($"one"), ExtractTile($"two"))).as[Tile]
+ assertEqual(maybeThreeTile.first(), three.toArrayTile())
+ checkDocs("rf_local_add")
+ }
+
+ it("should local_subtract") {
+ val df = Seq((three, two)).toDF("three", "two")
+ val maybeOne = df.select(local_subtract($"three", $"two")).as[ProjectedRasterTile]
+ assertEqual(maybeOne.first(), one)
+
+ assertEqual(df.selectExpr("rf_local_subtract(three, two)").as[ProjectedRasterTile].first(), one)
+
+ val maybeOneTile =
+ df.select(local_subtract(ExtractTile($"three"), ExtractTile($"two"))).as[Tile]
+ assertEqual(maybeOneTile.first(), one.toArrayTile())
+ checkDocs("rf_local_subtract")
+ }
+
+ it("should local_multiply") {
+ val df = Seq((three, two)).toDF("three", "two")
+
+ val maybeSix = df.select(local_multiply($"three", $"two")).as[ProjectedRasterTile]
+ assertEqual(maybeSix.first(), six)
+
+ assertEqual(df.selectExpr("rf_local_multiply(three, two)").as[ProjectedRasterTile].first(), six)
+
+ val maybeSixTile =
+ df.select(local_multiply(ExtractTile($"three"), ExtractTile($"two"))).as[Tile]
+ assertEqual(maybeSixTile.first(), six.toArrayTile())
+ checkDocs("rf_local_multiply")
+ }
+
+ it("should local_divide") {
+ val df = Seq((six, two)).toDF("six", "two")
+ val maybeThree = df.select(local_divide($"six", $"two")).as[ProjectedRasterTile]
+ assertEqual(maybeThree.first(), three)
+
+ assertEqual(df.selectExpr("rf_local_divide(six, two)").as[ProjectedRasterTile].first(), three)
+
+ assertEqual(df.selectExpr("rf_local_multiply(rf_local_divide(six, 2.0), two)")
+ .as[ProjectedRasterTile].first(), six)
+
+ val maybeThreeTile =
+ df.select(local_divide(ExtractTile($"six"), ExtractTile($"two"))).as[Tile]
+ assertEqual(maybeThreeTile.first(), three.toArrayTile())
+ checkDocs("rf_local_divide")
+ }
+ }
+
+ describe("scalar tile operations") {
+ it("should local_add") {
+ val df = Seq(one).toDF("one")
+ val maybeThree = df.select(local_add($"one", 2)).as[ProjectedRasterTile]
+ assertEqual(maybeThree.first(), three)
+
+ val maybeThreeD = df.select(local_add($"one", 2.1)).as[ProjectedRasterTile]
+ assertEqual(maybeThreeD.first(), three.convert(DoubleConstantNoDataCellType).localAdd(0.1))
+
+ val maybeThreeTile = df.select(local_add(ExtractTile($"one"), 2)).as[Tile]
+ assertEqual(maybeThreeTile.first(), three.toArrayTile())
+ }
+
+ it("should local_subtract") {
+ val df = Seq(three).toDF("three")
+
+ val maybeOne = df.select(local_subtract($"three", 2)).as[ProjectedRasterTile]
+ assertEqual(maybeOne.first(), one)
+
+ val maybeOneD = df.select(local_subtract($"three", 2.0)).as[ProjectedRasterTile]
+ assertEqual(maybeOneD.first(), one)
+
+ val maybeOneTile = df.select(local_subtract(ExtractTile($"three"), 2)).as[Tile]
+ assertEqual(maybeOneTile.first(), one.toArrayTile())
+ }
+
+ it("should local_multiply") {
+ val df = Seq(three).toDF("three")
+
+ val maybeSix = df.select(local_multiply($"three", 2)).as[ProjectedRasterTile]
+ assertEqual(maybeSix.first(), six)
+
+ val maybeSixD = df.select(local_multiply($"three", 2.0)).as[ProjectedRasterTile]
+ assertEqual(maybeSixD.first(), six)
+
+ val maybeSixTile = df.select(local_multiply(ExtractTile($"three"), 2)).as[Tile]
+ assertEqual(maybeSixTile.first(), six.toArrayTile())
+ }
+
+ it("should local_divide") {
+ val df = Seq(six).toDF("six")
+
+ val maybeThree = df.select(local_divide($"six", 2)).as[ProjectedRasterTile]
+ assertEqual(maybeThree.first(), three)
+
+ val maybeThreeD = df.select(local_divide($"six", 2.0)).as[ProjectedRasterTile]
+ assertEqual(maybeThreeD.first(), three)
+
+ val maybeThreeTile = df.select(local_divide(ExtractTile($"six"), 2)).as[Tile]
+ assertEqual(maybeThreeTile.first(), three.toArrayTile())
+ }
+ }
+
+ describe("tile comparison relations") {
+ it("should evaluate local_less") {
+ val df = Seq((two, three, six)).toDF("two", "three", "six")
+ df.select(tile_sum(local_less($"two", 6))).first() should be(100.0)
+ df.select(tile_sum(local_less($"two", 1.9))).first() should be(0.0)
+ df.select(tile_sum(local_less($"two", 2))).first() should be(0.0)
+ df.select(tile_sum(local_less($"three", $"two"))).first() should be(0.0)
+ df.select(tile_sum(local_less($"three", $"three"))).first() should be(0.0)
+ df.select(tile_sum(local_less($"three", $"six"))).first() should be(100.0)
+
+ df.selectExpr("rf_tile_sum(rf_local_less(two, 6))").as[Double].first() should be(100.0)
+ df.selectExpr("rf_tile_sum(rf_local_less(three, three))").as[Double].first() should be(0.0)
+ checkDocs("rf_local_less")
+ }
+
+ it("should evaluate local_less_equal") {
+ val df = Seq((two, three, six)).toDF("two", "three", "six")
+ df.select(tile_sum(local_less_equal($"two", 6))).first() should be(100.0)
+ df.select(tile_sum(local_less_equal($"two", 1.9))).first() should be(0.0)
+ df.select(tile_sum(local_less_equal($"two", 2))).first() should be(100.0)
+ df.select(tile_sum(local_less_equal($"three", $"two"))).first() should be(0.0)
+ df.select(tile_sum(local_less_equal($"three", $"three"))).first() should be(100.0)
+ df.select(tile_sum(local_less_equal($"three", $"six"))).first() should be(100.0)
+
+ df.selectExpr("rf_tile_sum(rf_local_less_equal(two, 6))").as[Double].first() should be(100.0)
+ df.selectExpr("rf_tile_sum(rf_local_less_equal(three, three))").as[Double].first() should be(100.0)
+ checkDocs("rf_local_less_equal")
+ }
+
+ it("should evaluate local_greater") {
+ val df = Seq((two, three, six)).toDF("two", "three", "six")
+ df.select(tile_sum(local_greater($"two", 6))).first() should be(0.0)
+ df.select(tile_sum(local_greater($"two", 1.9))).first() should be(100.0)
+ df.select(tile_sum(local_greater($"two", 2))).first() should be(0.0)
+ df.select(tile_sum(local_greater($"three", $"two"))).first() should be(100.0)
+ df.select(tile_sum(local_greater($"three", $"three"))).first() should be(0.0)
+ df.select(tile_sum(local_greater($"three", $"six"))).first() should be(0.0)
+
+ df.selectExpr("rf_tile_sum(rf_local_greater(two, 1.9))").as[Double].first() should be(100.0)
+ df.selectExpr("rf_tile_sum(rf_local_greater(three, three))").as[Double].first() should be(0.0)
+ checkDocs("rf_local_greater")
+ }
+
+ it("should evaluate local_greater_equal") {
+ val df = Seq((two, three, six)).toDF("two", "three", "six")
+ df.select(tile_sum(local_greater_equal($"two", 6))).first() should be(0.0)
+ df.select(tile_sum(local_greater_equal($"two", 1.9))).first() should be(100.0)
+ df.select(tile_sum(local_greater_equal($"two", 2))).first() should be(100.0)
+ df.select(tile_sum(local_greater_equal($"three", $"two"))).first() should be(100.0)
+ df.select(tile_sum(local_greater_equal($"three", $"three"))).first() should be(100.0)
+ df.select(tile_sum(local_greater_equal($"three", $"six"))).first() should be(0.0)
+ df.selectExpr("rf_tile_sum(rf_local_greater_equal(two, 1.9))").as[Double].first() should be(100.0)
+ df.selectExpr("rf_tile_sum(rf_local_greater_equal(three, three))").as[Double].first() should be(100.0)
+ checkDocs("rf_local_greater_equal")
+ }
+
+ it("should evaluate local_equal") {
+ val df = Seq((two, three, three)).toDF("two", "threeA", "threeB")
+ df.select(tile_sum(local_equal($"two", 2))).first() should be(100.0)
+ df.select(tile_sum(local_equal($"two", 2.1))).first() should be(0.0)
+ df.select(tile_sum(local_equal($"two", $"threeA"))).first() should be(0.0)
+ df.select(tile_sum(local_equal($"threeA", $"threeB"))).first() should be(100.0)
+ df.selectExpr("rf_tile_sum(rf_local_equal(two, 1.9))").as[Double].first() should be(0.0)
+ df.selectExpr("rf_tile_sum(rf_local_equal(threeA, threeB))").as[Double].first() should be(100.0)
+ checkDocs("rf_local_equal")
+ }
+
+ it("should evaluate local_unequal") {
+ val df = Seq((two, three, three)).toDF("two", "threeA", "threeB")
+ df.select(tile_sum(local_unequal($"two", 2))).first() should be(0.0)
+ df.select(tile_sum(local_unequal($"two", 2.1))).first() should be(100.0)
+ df.select(tile_sum(local_unequal($"two", $"threeA"))).first() should be(100.0)
+ df.select(tile_sum(local_unequal($"threeA", $"threeB"))).first() should be(0.0)
+ df.selectExpr("rf_tile_sum(rf_local_unequal(two, 1.9))").as[Double].first() should be(100.0)
+ df.selectExpr("rf_tile_sum(rf_local_unequal(threeA, threeB))").as[Double].first() should be(0.0)
+ checkDocs("rf_local_unequal")
+ }
+ }
+
+ describe("per-tile stats") {
+ it("should compute data cell counts") {
+ val df = Seq(TestData.injectND(numND)(two)).toDF("two")
+ df.select(data_cells($"two")).first() shouldBe (cols * rows - numND).toLong
+
+ val df2 = randNDTilesWithNull.toDF("tile")
+ df2.select(data_cells($"tile") as "cells")
+ .agg(sum("cells"))
+ .as[Long]
+ .first() should be (expectedRandData)
+
+ checkDocs("rf_data_cells")
+ }
+ it("should compute no-data cell counts") {
+ val df = Seq(TestData.injectND(numND)(two)).toDF("two")
+ df.select(no_data_cells($"two")).first() should be(numND)
+
+ val df2 = randNDTilesWithNull.toDF("tile")
+ df2.select(no_data_cells($"tile") as "cells")
+ .agg(sum("cells"))
+ .as[Long]
+ .first() should be (expectedRandNoData)
+
+ checkDocs("rf_no_data_cells")
+ }
+ it("should detect no-data tiles") {
+ val df = Seq(nd).toDF("nd")
+ df.select(is_no_data_tile($"nd")).first() should be(true)
+ val df2 = Seq(two).toDF("not_nd")
+ df2.select(is_no_data_tile($"not_nd")).first() should be(false)
+ checkDocs("rf_is_no_data_tile")
+ }
+ it("should find the minimum cell value") {
+ val min = randNDTile.toArray().filter(c => raster.isData(c)).min.toDouble
+ val df = Seq(randNDTile).toDF("rand")
+ df.select(tile_min($"rand")).first() should be(min)
+ df.selectExpr("rf_tile_min(rand)").as[Double].first() should be(min)
+ checkDocs("rf_tile_min")
+ }
+
+ it("should find the maximum cell value") {
+ val max = randNDTile.toArray().filter(c => raster.isData(c)).max.toDouble
+ val df = Seq(randNDTile).toDF("rand")
+ df.select(tile_max($"rand")).first() should be(max)
+ df.selectExpr("rf_tile_max(rand)").as[Double].first() should be(max)
+ checkDocs("rf_tile_max")
+ }
+ it("should compute the tile mean cell value") {
+ val values = randNDTile.toArray().filter(c => raster.isData(c))
+ val mean = values.sum.toDouble / values.length
+ val df = Seq(randNDTile).toDF("rand")
+ df.select(tile_mean($"rand")).first() should be(mean)
+ df.selectExpr("rf_tile_mean(rand)").as[Double].first() should be(mean)
+ checkDocs("rf_tile_mean")
+ }
+
+ it("should compute the tile summary statistics") {
+ val values = randNDTile.toArray().filter(c => raster.isData(c))
+ val mean = values.sum.toDouble / values.length
+ val df = Seq(randNDTile).toDF("rand")
+ val stats = df.select(tile_stats($"rand")).first()
+ stats.mean should be (mean +- 0.00001)
+
+ val stats2 = df.selectExpr("rf_tile_stats(rand) as stats")
+ .select($"stats".as[CellStatistics])
+ .first()
+ stats2 should be (stats)
+
+ df.select(tile_stats($"rand") as "stats")
+ .select($"stats.mean").as[Double]
+ .first() should be(mean +- 0.00001)
+ df.selectExpr("rf_tile_stats(rand) as stats")
+ .select($"stats.no_data_cells").as[Long]
+ .first() should be <= (cols * rows - numND).toLong
+
+ val df2 = randNDTilesWithNull.toDF("tile")
+ df2
+ .select(tile_stats($"tile")("data_cells") as "cells")
+ .agg(sum("cells"))
+ .as[Long]
+ .first() should be (expectedRandData)
+
+ checkDocs("rf_tile_stats")
+ }
+
+ it("should compute the tile histogram") {
+ val df = Seq(randNDTile).toDF("rand")
+ val h1 = df.select(tile_histogram($"rand")).first()
+
+ val h2 = df.selectExpr("rf_tile_histogram(rand) as hist")
+ .select($"hist".as[CellHistogram])
+ .first()
+
+ h1 should be (h2)
+
+ checkDocs("rf_tile_histogram")
+ }
+ }
+
+ describe("aggregate statistics") {
+ it("should count data cells") {
+ val df = randNDTilesWithNull.filter(_ != null).toDF("tile")
+ df.select(agg_data_cells($"tile")).first() should be (expectedRandData)
+ df.selectExpr("rf_agg_data_cells(tile)").as[Long].first() should be (expectedRandData)
+
+ checkDocs("rf_agg_data_cells")
+ }
+ it("should count no-data cells") {
+ val df = randNDTilesWithNull.toDF("tile")
+ df.select(agg_no_data_cells($"tile")).first() should be (expectedRandNoData)
+ df.selectExpr("rf_agg_no_data_cells(tile)").as[Long].first() should be (expectedRandNoData)
+ checkDocs("rf_agg_no_data_cells")
+ }
+
+ it("should compute aggregate statistics") {
+ val df = randNDTilesWithNull.toDF("tile")
+
+ df
+ .select(agg_stats($"tile") as "stats")
+ .select("stats.data_cells", "stats.no_data_cells")
+ .as[(Long, Long)]
+ .first() should be ((expectedRandData, expectedRandNoData))
+ df.selectExpr("rf_agg_stats(tile) as stats")
+ .select("stats.data_cells")
+ .as[Long]
+ .first() should be (expectedRandData)
+
+ checkDocs("rf_agg_stats")
+ }
+
+ it("should compute a aggregate histogram") {
+ val df = randNDTilesWithNull.toDF("tile")
+ val hist1 = df.select(agg_approx_histogram($"tile")).first()
+ val hist2 = df.selectExpr("rf_agg_approx_histogram(tile) as hist")
+ .select($"hist".as[CellHistogram])
+ .first()
+ hist1 should be (hist2)
+ checkDocs("rf_agg_approx_histogram")
+ }
+
+ it("should compute local statistics") {
+ val df = randNDTilesWithNull.toDF("tile")
+ val stats1 = df.select(agg_local_stats($"tile"))
+ .first()
+ val stats2 = df.selectExpr("rf_agg_local_stats(tile) as stats")
+ .select($"stats".as[LocalCellStatistics])
+ .first()
+
+ stats1 should be (stats2)
+ checkDocs("rf_agg_local_stats")
+ }
+
+ it("should compute local min") {
+ val df = Seq(two, three, one, six).toDF("tile")
+ df.select(agg_local_min($"tile")).first() should be(one.toArrayTile())
+ df.selectExpr("rf_agg_local_min(tile)").as[Tile].first() should be(one.toArrayTile())
+ checkDocs("rf_agg_local_min")
+ }
+
+ it("should compute local max") {
+ val df = Seq(two, three, one, six).toDF("tile")
+ df.select(agg_local_max($"tile")).first() should be(six.toArrayTile())
+ df.selectExpr("rf_agg_local_max(tile)").as[Tile].first() should be(six.toArrayTile())
+ checkDocs("rf_agg_local_max")
+ }
+
+ it("should compute local data cell counts") {
+ val df = Seq(two, randNDTile, nd).toDF("tile")
+ val t1 = df.select(agg_local_data_cells($"tile")).first()
+ val t2 = df.selectExpr("rf_agg_local_data_cells(tile) as cnt").select($"cnt".as[Tile]).first()
+ t1 should be (t2)
+ checkDocs("rf_agg_local_data_cells")
+ }
+
+ it("should compute local no-data cell counts") {
+ val df = Seq(two, randNDTile, nd).toDF("tile")
+ val t1 = df.select(agg_local_no_data_cells($"tile")).first()
+ val t2 = df.selectExpr("rf_agg_local_no_data_cells(tile) as cnt").select($"cnt".as[Tile]).first()
+ t1 should be (t2)
+ val t3 = df.select(local_add(agg_local_data_cells($"tile"), agg_local_no_data_cells($"tile"))).first()
+ t3 should be(three.toArrayTile())
+ checkDocs("rf_agg_local_no_data_cells")
+ }
+ }
+
+ describe("analytical transformations") {
+ it("should compute normalized_difference") {
+ val df = Seq((three, two)).toDF("three", "two")
+
+ df.select(tile_to_array_double(normalized_difference($"three", $"two")))
+ .first()
+ .forall(_ == 0.2) shouldBe true
+
+ df.selectExpr("rf_tile_to_array_double(rf_normalized_difference(three, two))")
+ .as[Array[Double]]
+ .first()
+ .forall(_ == 0.2) shouldBe true
+
+ checkDocs("rf_normalized_difference")
+ }
+
+ it("should mask one tile against another") {
+ val df = Seq[Tile](randTile).toDF("tile")
+
+ val withMask = df.withColumn("mask",
+ convert_cell_type(
+ local_greater($"tile", 50),
+ "uint8")
+ )
+
+ val withMasked = withMask.withColumn("masked",
+ mask($"tile", $"mask"))
+
+ val result = withMasked.agg(agg_no_data_cells($"tile") < agg_no_data_cells($"masked")).as[Boolean]
+
+ result.first() should be(true)
+
+ checkDocs("rf_mask")
+ }
+
+ it("should inverse mask one tile against another") {
+ val df = Seq[Tile](randTile).toDF("tile")
+
+ val baseND = df.select(agg_no_data_cells($"tile")).first()
+
+ val withMask = df.withColumn("mask",
+ convert_cell_type(
+ local_greater($"tile", 50),
+ "uint8"
+ )
+ )
+
+ val withMasked = withMask
+ .withColumn("masked", mask($"tile", $"mask"))
+ .withColumn("inv_masked", inverse_mask($"tile", $"mask"))
+
+ val result = withMasked.agg(agg_no_data_cells($"masked") + agg_no_data_cells($"inv_masked")).as[Long]
+
+ result.first() should be(tileSize + baseND)
+
+ checkDocs("rf_inverse_mask")
+ }
+
+ it("should mask tile by another identified by specified value") {
+ val df = Seq[Tile](randTile).toDF("tile")
+ val mask_value = 4
+
+ val withMask = df.withColumn("mask",
+ local_multiply(convert_cell_type(
+ local_greater($"tile", 50),
+ "uint8"),
+ lit(mask_value)
+ )
+ )
+
+ val withMasked = withMask.withColumn("masked",
+ mask_by_value($"tile", $"mask", lit(mask_value)))
+
+ val result = withMasked.agg(agg_no_data_cells($"tile") < agg_no_data_cells($"masked")).as[Boolean]
+
+ result.first() should be(true)
+ checkDocs("rf_mask_by_value")
+ }
+
+ it("should render ascii art") {
+ val df = Seq[Tile](ProjectedRasterTile(TestData.l8Labels)).toDF("tile")
+ val r1 = df.select(render_ascii($"tile"))
+ val r2 = df.selectExpr("rf_render_ascii(tile)").as[String]
+ r1.first() should be(r2.first())
+ checkDocs("rf_render_ascii")
+ }
+
+ it("should render cells as matrix") {
+ val df = Seq(randDoubleNDTile).toDF("tile")
+ val r1 = df.select(render_matrix($"tile"))
+ val r2 = df.selectExpr("rf_render_matrix(tile)").as[String]
+ r1.first() should be(r2.first())
+ checkDocs("rf_render_matrix")
+ }
+
+ it("should round tile cell values") {
+
+ val three_plus = TestData.projectedRasterTile(cols, rows, 3.12, extent, crs, DoubleConstantNoDataCellType)
+ val three_less = TestData.projectedRasterTile(cols, rows, 2.92, extent, crs, DoubleConstantNoDataCellType)
+ val three_double = TestData.projectedRasterTile(cols, rows, 3.0, extent, crs, DoubleConstantNoDataCellType)
+
+ val df = Seq((three_plus, three_less, three)).toDF("three_plus", "three_less", "three")
+
+ assertEqual(df.select(round($"three")).as[ProjectedRasterTile].first(), three)
+ assertEqual(df.select(round($"three_plus")).as[ProjectedRasterTile].first(), three_double)
+ assertEqual(df.select(round($"three_less")).as[ProjectedRasterTile].first(), three_double)
+
+ assertEqual(df.selectExpr("rf_round(three)").as[ProjectedRasterTile].first(), three)
+ assertEqual(df.selectExpr("rf_round(three_plus)").as[ProjectedRasterTile].first(), three_double)
+ assertEqual(df.selectExpr("rf_round(three_less)").as[ProjectedRasterTile].first(), three_double)
+
+ checkDocs("rf_round")
+ }
+
+ it("should take logarithms positive cell values"){
+ // log10 1000 == 3
+ val thousand = TestData.projectedRasterTile(cols, rows, 1000, extent, crs, ShortConstantNoDataCellType)
+ val threesDouble = TestData.projectedRasterTile(cols, rows, 3.0, extent, crs, DoubleConstantNoDataCellType)
+ val zerosDouble = TestData.projectedRasterTile(cols, rows, 0.0, extent, crs, DoubleConstantNoDataCellType)
+
+ val df1 = Seq(thousand).toDF("tile")
+ assertEqual(df1.select(log10($"tile")).as[ProjectedRasterTile].first(), threesDouble)
+
+ // ln random tile == log10 random tile / log10(e); random tile square to ensure all positive cell values
+ val df2 = Seq(randPositiveDoubleTile).toDF("tile")
+ val log10e = math.log10(math.E)
+ assertEqual(df2.select(log($"tile")).as[ProjectedRasterTile].first(),
+ df2.select(log10($"tile")).as[ProjectedRasterTile].first() / log10e)
+
+ lazy val maybeZeros = df2
+ .selectExpr(s"rf_local_subtract(rf_log(tile), rf_local_divide(rf_log10(tile), ${log10e}))")
+ .as[ProjectedRasterTile].first()
+ assertEqual(maybeZeros, zerosDouble)
+
+ // log1p for zeros should be ln(1)
+ val ln1 = math.log1p(0.0)
+ val df3 = Seq(zero).toDF("tile")
+ val maybeLn1 = df3.selectExpr(s"rf_log1p(tile)").as[ProjectedRasterTile].first()
+ assert(maybeLn1.toArrayDouble().forall(_ == ln1))
+
+ checkDocs("rf_log")
+ checkDocs("rf_log2")
+ checkDocs("rf_log10")
+ checkDocs("rf_log1p")
+ }
+
+ it("should take logarithms with non-positive cell values") {
+ val ni_float = TestData.projectedRasterTile(cols, rows, Double.NegativeInfinity, extent, crs, DoubleConstantNoDataCellType)
+ val zero_float =TestData.projectedRasterTile(cols, rows, 0.0, extent, crs, DoubleConstantNoDataCellType)
+
+ // tile zeros ==> -Infinity
+ val df_0 = Seq(zero).toDF("tile")
+ assertEqual(df_0.select(log($"tile")).as[ProjectedRasterTile].first(), ni_float)
+ assertEqual(df_0.select(log10($"tile")).as[ProjectedRasterTile].first(), ni_float)
+ assertEqual(df_0.select(log2($"tile")).as[ProjectedRasterTile].first(), ni_float)
+ // log1p of zeros should be 0.
+ assertEqual(df_0.select(log1p($"tile")).as[ProjectedRasterTile].first(), zero_float)
+
+ // tile negative values ==> NaN
+ assert(df_0.selectExpr("rf_log(rf_local_subtract(tile, 42))").as[ProjectedRasterTile].first().isNoDataTile)
+ assert(df_0.selectExpr("rf_log2(rf_local_subtract(tile, 42))").as[ProjectedRasterTile].first().isNoDataTile)
+ assert(df_0.select(log1p(local_subtract($"tile", 42))).as[ProjectedRasterTile].first().isNoDataTile)
+ assert(df_0.select(log10(local_subtract($"tile", lit(0.01)))).as[ProjectedRasterTile].first().isNoDataTile)
+
+ }
+
+ it("should take exponential") {
+ val df = Seq(six).toDF("tile")
+
+ // exp inverses log
+ assertEqual(
+ df.select(exp(log($"tile"))).as[ProjectedRasterTile].first(),
+ six
+ )
+
+ // base 2
+ assertEqual(
+ df.select(exp2(log2($"tile"))).as[ProjectedRasterTile].first(),
+ six)
+
+ // base 10
+ assertEqual(
+ df.select(exp10(log10($"tile"))).as[ProjectedRasterTile].first(),
+ six)
+
+ // plus/minus 1
+ assertEqual(
+ df.select(expm1(log1p($"tile"))).as[ProjectedRasterTile].first(),
+ six)
+
+ // SQL
+ assertEqual(
+ df.selectExpr("rf_exp(rf_log(tile))").as[ProjectedRasterTile].first(),
+ six)
+
+ // SQL base 10
+ assertEqual(
+ df.selectExpr("rf_exp10(rf_log10(tile))").as[ProjectedRasterTile].first(),
+ six)
+
+ // SQL base 2
+ assertEqual(
+ df.selectExpr("rf_exp2(rf_log2(tile))").as[ProjectedRasterTile].first(),
+ six)
+
+ // SQL expm1
+ assertEqual(
+ df.selectExpr("rf_expm1(rf_log1p(tile))").as[ProjectedRasterTile].first(),
+ six)
+
+ checkDocs("rf_exp")
+ checkDocs("rf_exp10")
+ checkDocs("rf_exp2")
+ checkDocs("rf_expm1")
+
+ }
+ }
+ it("should resample") {
+ def lowRes = {
+ def base = ArrayTile(Array(1,2,3,4), 2, 2)
+ ProjectedRasterTile(base.convert(ct), extent, crs)
+ }
+ def upsampled = {
+ def base = ArrayTile(Array(
+ 1,1,2,2,
+ 1,1,2,2,
+ 3,3,4,4,
+ 3,3,4,4
+ ), 4, 4)
+ ProjectedRasterTile(base.convert(ct), extent, crs)
+ }
+ // a 4, 4 tile to upsample by shape
+ def fourByFour = TestData.projectedRasterTile(4, 4, 0, extent, crs, ct)
+
+ def df = Seq(lowRes).toDF("tile")
+
+ val maybeUp = df.select(resample($"tile", lit(2))).as[ProjectedRasterTile].first()
+ assertEqual(maybeUp, upsampled)
+
+ def df2 = Seq((lowRes, fourByFour)).toDF("tile1", "tile2")
+ val maybeUpShape = df2.select(resample($"tile1", $"tile2")).as[ProjectedRasterTile].first()
+ assertEqual(maybeUpShape, upsampled)
+
+ // Downsample by double argument < 1
+ def df3 = Seq(upsampled).toDF("tile").withColumn("factor", lit(0.5))
+ assertEqual(df3.selectExpr("rf_resample(tile, 0.5)").as[ProjectedRasterTile].first(), lowRes)
+ assertEqual(df3.selectExpr("rf_resample(tile, factor)").as[ProjectedRasterTile].first(), lowRes)
+
+ checkDocs("rf_resample")
+ }
+}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometryTest.scala b/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala
similarity index 98%
rename from core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometryTest.scala
rename to core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala
index 25ef32176..39ea3b1c1 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometryTest.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala
@@ -31,7 +31,7 @@ import org.scalatest.{FunSpec, Matchers}
*
* @since 11/29/18
*/
-class ReprojectGeometryTest extends FunSpec
+class ReprojectGeometrySpec extends FunSpec
with TestEnvironment with Matchers {
import spark.implicits._
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TestData.scala b/core/src/test/scala/astraea/spark/rasterframes/TestData.scala
index 2d97bc4df..29c06849e 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TestData.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/TestData.scala
@@ -22,8 +22,10 @@ import java.net.URI
import java.nio.file.Paths
import java.time.ZonedDateTime
+import astraea.spark.rasterframes.expressions.tilestats.NoDataCells
+import astraea.spark.rasterframes.model.TileContext
import astraea.spark.rasterframes.tiles.ProjectedRasterTile
-import astraea.spark.rasterframes.{functions ⇒ F}
+import astraea.spark.rasterframes.{functions => F}
import com.vividsolutions.jts.geom.{Coordinate, GeometryFactory}
import geotrellis.proj4.{CRS, LatLng}
import geotrellis.raster
@@ -179,9 +181,9 @@ object TestData extends TestData {
) (
z ⇒ if (isNoData(z)) rnd.nextGaussian() else z
)
- } while (F.noDataCells(result) != 0L)
+ } while (NoDataCells.op(result) != 0L)
- assert(F.noDataCells(result) == 0L,
+ assert(NoDataCells.op(result) == 0L,
s"Should not have any NoData cells for $cellType:\n${result.asciiDraw()}")
result
}
@@ -205,7 +207,7 @@ object TestData extends TestData {
def projectedRasterTile[N: Numeric](
cols: Int, rows: Int,
- cellValue: N,
+ cellValue: => N,
extent: Extent, crs: CRS = LatLng,
cellType: CellType = ByteConstantNoDataCellType): ProjectedRasterTile = {
val num = implicitly[Numeric[N]]
@@ -237,11 +239,18 @@ object TestData extends TestData {
val targeted = rnd.shuffle(indexes).take(num)
def filter(c: Int, r: Int) = targeted.contains(r * t.cols + c)
- if(t.cellType.isFloatingPoint) {
+ val injected = if(t.cellType.isFloatingPoint) {
t.mapDouble((c, r, v) ⇒ (if(filter(c,r)) raster.doubleNODATA else v): Double)
}
else {
t.map((c, r, v) ⇒ if(filter(c, r)) raster.NODATA else v)
}
+
+// t match {
+// case TileContext(ext, crs) => ProjectedRasterTile(injected, ext, crs)
+// case _ => injected
+// }
+
+ injected
}
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala b/core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala
index 6b5111170..aaf173014 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala
@@ -19,11 +19,12 @@ package astraea.spark.rasterframes
import java.nio.file.{Files, Paths}
+import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.stringEnc
import astraea.spark.rasterframes.ref.RasterSource
import astraea.spark.rasterframes.ref.RasterSource.ReadCallback
import astraea.spark.rasterframes.util.toParquetFriendlyColumnName
import com.vividsolutions.jts.geom.Geometry
-import geotrellis.spark.testkit.{TestEnvironment ⇒ GeoTrellisTestEnvironment}
+import geotrellis.spark.testkit.{TestEnvironment => GeoTrellisTestEnvironment}
import geotrellis.util.LazyLogging
import org.apache.spark.SparkContext
import org.apache.spark.sql._
@@ -83,6 +84,14 @@ trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment
}
def matchGeom(g: Geometry, tolerance: Double) = new GeometryMatcher(g, tolerance)
+
+ def checkDocs(name: String): Unit = {
+ val docs = sql(s"DESCRIBE FUNCTION EXTENDED $name").as[String].collect().mkString("\n")
+ docs should include(name)
+ docs shouldNot include("not found")
+ docs shouldNot include("null")
+ docs shouldNot include("N/A")
+ }
}
object TestEnvironment {
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala
index 4c8cc48ea..29eff421f 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala
@@ -114,7 +114,7 @@ class TileAssemblerSpec extends TestEnvironment {
val expected = df.select(agg_stats($"tile")).first()
val result = assembled.select(agg_stats($"tile")).first()
- assert(result.copy(noDataCells = expected.noDataCells) === expected)
+ assert(result.copy(no_data_cells = expected.no_data_cells) === expected)
}
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala
index e4c5be7ac..781b8290d 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala
@@ -21,6 +21,7 @@ package astraea.spark.rasterframes
import astraea.spark.rasterframes.TestData.randomTile
import astraea.spark.rasterframes.TestData.fracTile
+import astraea.spark.rasterframes.expressions.aggstats.LocalMeanAggregate
import astraea.spark.rasterframes.stats.CellHistogram
import geotrellis.raster._
import geotrellis.spark._
@@ -34,13 +35,12 @@ import org.apache.spark.sql.functions._
* @since 9/18/17
*/
class TileStatsSpec extends TestEnvironment with TestData {
-
+ import sqlContext.implicits._
import TestData.injectND
describe("computing statistics over tiles") {
//import org.apache.spark.sql.execution.debug._
it("should report dimensions") {
- import sqlContext.implicits._
val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2")
val dims = df.select(tile_dimensions($"tile1") as "dims").select("dims.*")
@@ -48,8 +48,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
assert(dims.as[(Int, Int)].first() === (3, 3))
assert(dims.schema.head.name === "cols")
- val query = sql(
- """|select dims.* from (
+ val query = sql("""|select dims.* from (
|select rf_tile_dimensions(tiles) as dims from (
|select rf_make_constant_tile(1, 10, 10, 'int8raw') as tiles))
|""".stripMargin)
@@ -57,18 +56,19 @@ class TileStatsSpec extends TestEnvironment with TestData {
assert(query.as[(Int, Int)].first() === (10, 10))
df.repartition(4).createOrReplaceTempView("tmp")
- assert(sql("select dims.* from (select rf_tile_dimensions(tile2) as dims from tmp)")
- .as[(Int, Int)].first() === (3, 3))
+ assert(
+ sql("select dims.* from (select rf_tile_dimensions(tile2) as dims from tmp)")
+ .as[(Int, Int)]
+ .first() === (3, 3))
}
it("should report cell type") {
- import sqlContext.implicits._
val ct = functions.cellTypes().filter(_ != "bool")
- forEvery(ct) { c ⇒
+ forEvery(ct) { c =>
val expected = CellType.fromName(c)
val tile = randomTile(5, 5, expected)
val result = Seq(tile).toDF("tile").select(cell_type($"tile")).first()
- result should be (expected)
+ result should be(expected)
}
}
@@ -78,15 +78,15 @@ class TileStatsSpec extends TestEnvironment with TestData {
val tile3 = randomTile(255, 255, IntCellType)
it("should compute accurate item counts") {
- import sqlContext.implicits._
val ds = Seq[Tile](tile1, tile2, tile3).toDF("tiles")
val checkedValues = Seq[Double](0, 4, 7, 13, 26)
val result = checkedValues.map(x => ds.select(tile_histogram($"tiles")).first().itemCount(x))
- forEvery(checkedValues) { x => assert((x == 0 && result.head == 4) || result.contains(x - 1)) }
+ forEvery(checkedValues) { x =>
+ assert((x == 0 && result.head == 4) || result.contains(x - 1))
+ }
}
it("Should compute quantiles") {
- import sqlContext.implicits._
val ds = Seq[Tile](tile1, tile2, tile3).toDF("tiles")
val numBreaks = 5
val breaks = ds.select(tile_histogram($"tiles")).map(_.quantileBreaks(numBreaks)).collect()
@@ -101,51 +101,27 @@ class TileStatsSpec extends TestEnvironment with TestData {
ds.createOrReplaceTempView("tmp")
withClue("max") {
- val max = ds.agg(local_agg_max($"tiles"))
+ val max = ds.agg(agg_local_max($"tiles"))
val expected = Max(byteArrayTile, byteConstantTile)
write(max)
assert(max.as[Tile].first() === expected)
- val sqlMax = sql("select rf_local_agg_max(tiles) from tmp")
+ val sqlMax = sql("select rf_agg_local_max(tiles) from tmp")
assert(sqlMax.as[Tile].first() === expected)
}
withClue("min") {
- val min = ds.agg(local_agg_min($"tiles"))
+ val min = ds.agg(agg_local_min($"tiles"))
val expected = Min(byteArrayTile, byteConstantTile)
write(min)
assert(min.as[Tile].first() === Min(byteArrayTile, byteConstantTile))
- val sqlMin = sql("select rf_local_agg_min(tiles) from tmp")
+ val sqlMin = sql("select rf_agg_local_min(tiles) from tmp")
assert(sqlMin.as[Tile].first() === expected)
}
}
- it("should count data and no-data cells") {
- import sqlContext.implicits._
- val ds = (Seq.fill[Tile](10)(injectND(10)(randomTile(10, 10, UByteConstantNoDataCellType))) :+ null).toDF("tile")
- val expectedNoData = 10 * 10
- val expectedData = 10 * 10 * 10 - expectedNoData
-
- //logger.debug(ds.select($"tile").as[Tile].first.cell_type.name)
-
- assert(ds.select(data_cells($"tile") as "cells").agg(sum("cells")).as[Long].first() === expectedData)
- assert(ds.select(no_data_cells($"tile") as "cells").agg(sum("cells")).as[Long].first() === expectedNoData)
-
- assert(ds.select(agg_data_cells($"tile")).first() === expectedData)
- assert(ds.select(agg_no_data_cells($"tile")).first() === expectedNoData)
-
- val resultTileStats = ds.select(tile_stats($"tile")("dataCells") as "cells")
- .agg(sum("cells")).as[Long]
- .first()
- assert(resultTileStats === expectedData)
-
- val (aggDC, aggNDC) = ds.select(agg_stats($"tile")).select("dataCells", "noDataCells").as[(Long, Long)].first()
- assert(aggDC === expectedData)
- assert(aggNDC === expectedNoData)
- }
-
it("should compute tile statistics") {
import sqlContext.implicits._
withClue("mean") {
@@ -154,10 +130,11 @@ class TileStatsSpec extends TestEnvironment with TestData {
val means1 = ds.select(tile_stats($"value")).map(_.mean).collect
val means2 = ds.select(tile_mean($"value")).collect
// Compute the mean manually, knowing we're not dealing with no-data values.
- val means = ds.select(tile_to_array[Float]($"value")).map(a ⇒ a.sum.toDouble / a.length).collect
+ val means =
+ ds.select(tile_to_array_double($"value")).map(a => a.sum / a.length).collect
- forAll(means.zip(means1)) { case (l, r) ⇒ assert(l === r +- 1e-6) }
- forAll(means.zip(means2)) { case (l, r) ⇒ assert(l === r +- 1e-6) }
+ forAll(means.zip(means1)) { case (l, r) => assert(l === r +- 1e-6) }
+ forAll(means.zip(means2)) { case (l, r) => assert(l === r +- 1e-6) }
}
withClue("sum") {
val rf = l8Sample(1).projectedRaster.toRF
@@ -169,17 +146,15 @@ class TileStatsSpec extends TestEnvironment with TestData {
}
it("should compute per-tile histogram") {
- import sqlContext.implicits._
val ds = Seq.fill[Tile](3)(randomTile(5, 5, FloatCellType)).toDF("tiles")
ds.createOrReplaceTempView("tmp")
val r1 = ds.select(tile_histogram($"tiles"))
assert(r1.first.totalCount === 5 * 5)
write(r1)
-
val r2 = sql("select hist.* from (select rf_tile_histogram(tiles) as hist from tmp)").as[CellHistogram]
write(r2)
- assert(r1.first.mean === r2.first.mean)
+ assert(r1.first === r2.first)
}
it("should compute mean and total count") {
@@ -198,33 +173,30 @@ class TileStatsSpec extends TestEnvironment with TestData {
}
it("should compute aggregate histogram") {
- import sqlContext.implicits._
val tileSize = 5
val rows = 10
- val ds = Seq.fill[Tile](rows)(randomTile(tileSize, tileSize, FloatConstantNoDataCellType)).toDF("tiles")
+ val ds = Seq
+ .fill[Tile](rows)(randomTile(tileSize, tileSize, FloatConstantNoDataCellType))
+ .toDF("tiles")
ds.createOrReplaceTempView("tmp")
- val agg = ds.select(agg_histogram($"tiles")).as[CellHistogram]
+ val agg = ds.select(agg_approx_histogram($"tiles"))
+
val histArray = agg.collect()
- assert(histArray.length === 1)
+ histArray.length should be (1)
// examine histogram info
val hist = histArray.head
- //logger.info(hist.asciiHistogram(128))
- //logger.info(hist.asciiStats)
assert(hist.totalCount === rows * tileSize * tileSize)
assert(hist.bins.map(_.count).sum === rows * tileSize * tileSize)
- val stats = agg.map(_.stats).as("stats")
- //stats.select("stats.*").show(false)
- assert(stats.first().stddev === 1.0 +- 0.3) // <-- playing with statistical fire :)
+ val hist2 = sql("select hist.* from (select rf_agg_approx_histogram(tiles) as hist from tmp)").as[CellHistogram]
- val hist2 = sql("select hist.* from (select rf_agg_histogram(tiles) as hist from tmp)").as[CellHistogram]
+ hist2.first.totalCount should be (rows * tileSize * tileSize)
- assert(hist2.first.totalCount === rows * tileSize * tileSize)
+ checkDocs("rf_agg_approx_histogram")
}
it("should compute aggregate mean") {
- import sqlContext.implicits._
val ds = (Seq.fill[Tile](10)(randomTile(5, 5, FloatCellType)) :+ null).toDF("tiles")
val agg = ds.select(agg_mean($"tiles"))
val stats = ds.select(agg_stats($"tiles") as "stats").select($"stats.mean".as[Double])
@@ -232,14 +204,13 @@ class TileStatsSpec extends TestEnvironment with TestData {
}
it("should compute aggregate statistics") {
- import sqlContext.implicits._
val ds = Seq.fill[Tile](10)(randomTile(5, 5, FloatConstantNoDataCellType)).toDF("tiles")
val exploded = ds.select(explode_tiles($"tiles"))
val (mean, vrnc) = exploded.agg(avg($"tiles"), var_pop($"tiles")).as[(Double, Double)].first
val stats = ds.select(agg_stats($"tiles") as "stats") ///.as[(Long, Double, Double, Double, Double)]
-stats.printSchema()
+ //stats.printSchema()
noException shouldBe thrownBy {
ds.select(agg_stats($"tiles")).collect()
}
@@ -250,7 +221,7 @@ stats.printSchema()
ds.createOrReplaceTempView("tmp")
val agg2 = sql("select stats.* from (select rf_agg_stats(tiles) as stats from tmp)")
- assert(agg2.first().getAs[Long]("dataCells") === 250L)
+ assert(agg2.first().getAs[Long]("data_cells") === 250L)
val agg3 = ds.agg(agg_stats($"tiles") as "stats").select($"stats.mean".as[Double])
assert(mean === agg3.first())
@@ -258,13 +229,14 @@ stats.printSchema()
it("should compute aggregate local stats") {
import sqlContext.implicits._
- val ave = (nums: Array[Double]) ⇒ nums.sum / nums.length
+ val ave = (nums: Array[Double]) => nums.sum / nums.length
- val ds = (Seq.fill[Tile](30)(randomTile(5, 5, FloatConstantNoDataCellType))
+ val ds = (Seq
+ .fill[Tile](30)(randomTile(5, 5, FloatConstantNoDataCellType))
.map(injectND(2)) :+ null).toDF("tiles")
ds.createOrReplaceTempView("tmp")
- val agg = ds.select(local_agg_stats($"tiles") as "stats")
+ val agg = ds.select(agg_local_stats($"tiles") as "stats")
val stats = agg.select("stats.*")
//printStatsRows(stats)
@@ -273,23 +245,23 @@ stats.printSchema()
assert(min < -2.0)
val max = agg.select($"stats.max".as[Tile]).map(_.toArrayDouble().max).first
assert(max > 2.0)
- val tendancy = agg.select($"stats.mean".as[Tile]).map(t ⇒ ave(t.toArrayDouble())).first
+ val tendancy = agg.select($"stats.mean".as[Tile]).map(t => ave(t.toArrayDouble())).first
assert(tendancy < 0.2)
- val varg = agg.select($"stats.mean".as[Tile]).map(t ⇒ ave(t.toArrayDouble())).first
+ val varg = agg.select($"stats.mean".as[Tile]).map(t => ave(t.toArrayDouble())).first
assert(varg < 1.1)
- val sqlStats = sql("SELECT stats.* from (SELECT rf_local_agg_stats(tiles) as stats from tmp)")
+ val sqlStats = sql("SELECT stats.* from (SELECT rf_agg_local_stats(tiles) as stats from tmp)")
val tiles = stats.collect().flatMap(_.toSeq).map(_.asInstanceOf[Tile])
val dsTiles = sqlStats.collect().flatMap(_.toSeq).map(_.asInstanceOf[Tile])
- forEvery(tiles.zip(dsTiles)) { case (t1, t2) ⇒
- assert(t1 === t2)
+ forEvery(tiles.zip(dsTiles)) {
+ case (t1, t2) =>
+ assert(t1 === t2)
}
}
it("should compute accurate statistics") {
- import sqlContext.implicits._
val completeTile = squareIncrementingTile(4).convert(IntConstantNoDataCellType)
val incompleteTile = injectND(2)(completeTile)
@@ -297,50 +269,50 @@ stats.printSchema()
val dsNd = (Seq.fill(20)(completeTile) :+ incompleteTile :+ null).toDF("tiles")
// counted everything properly
- val countTile = ds.select(local_agg_data_cells($"tiles")).first()
- forAll(countTile.toArray())(i ⇒ assert(i === 20))
+ val countTile = ds.select(agg_local_data_cells($"tiles")).first()
+ forAll(countTile.toArray())(i => assert(i === 20))
- val countArray = dsNd.select(local_agg_data_cells($"tiles")).first().toArray()
- val expectedCount = (completeTile.localDefined().toArray zip incompleteTile.localDefined().toArray())
- .toSeq.map(pr ⇒ pr._1 * 20 + pr._2)
+ val countArray = dsNd.select(agg_local_data_cells($"tiles")).first().toArray()
+ val expectedCount =
+ (completeTile.localDefined().toArray zip incompleteTile.localDefined().toArray()).toSeq.map(
+ pr => pr._1 * 20 + pr._2)
assert(countArray === expectedCount)
- val countNodataArray = dsNd.select(local_agg_no_data_cells($"tiles")).first().toArray
+ val countNodataArray = dsNd.select(agg_local_no_data_cells($"tiles")).first().toArray
assert(countNodataArray === incompleteTile.localUndefined().toArray)
- // GeoTrellis docs do not say how NODATA is treated, but NODATA values are ignored
- val meanTile = dsNd.select(local_agg_mean($"tiles")).first()
- assert(meanTile.toArray() === completeTile.toArray())
+ val minTile = dsNd.select(agg_local_min($"tiles")).first()
+ assert(minTile.toArray() === completeTile.toArray())
- // GeoTrellis docs state that Min(1.0, NODATA) = NODATA
- val minTile = dsNd.select(local_agg_min($"tiles")).first()
- assert(minTile.toArray() === incompleteTile.toArray())
+ val maxTile = dsNd.select(agg_local_max($"tiles")).first()
+ assert(maxTile.toArray() === completeTile.toArray())
- // GeoTrellis docs state that Max(1.0, NODATA) = NODATA
- val maxTile = dsNd.select(local_agg_max($"tiles")).first()
- assert(maxTile.toArray() === incompleteTile.toArray())
+ val meanTile = dsNd.select(agg_local_mean($"tiles")).first()
+ assert(meanTile.toArray() === completeTile.toArray())
}
}
describe("NoData handling") {
- import sqlContext.implicits._
val tsize = 5
val count = 20
val nds = 2
- val tiles = (Seq.fill[Tile](count)(randomTile(tsize, tsize, UByteUserDefinedNoDataCellType(255.toByte)))
+ val tiles = (Seq
+ .fill[Tile](count)(randomTile(tsize, tsize, UByteUserDefinedNoDataCellType(255.toByte)))
.map(injectND(nds)) :+ null).toDF("tiles")
it("should count cells by NoData state") {
val counts = tiles.select(no_data_cells($"tiles")).collect().dropRight(1)
- forEvery(counts)(c ⇒ assert(c === nds))
+ forEvery(counts)(c => assert(c === nds))
val counts2 = tiles.select(data_cells($"tiles")).collect().dropRight(1)
- forEvery(counts2)(c ⇒ assert(c === tsize * tsize - nds))
+ forEvery(counts2)(c => assert(c === tsize * tsize - nds))
}
it("should detect all NoData tiles") {
val ndCount = tiles.select("*").where(is_no_data_tile($"tiles")).count()
ndCount should be(1)
- val ndTiles = (Seq.fill[Tile](count)(ArrayTile.empty(UByteConstantNoDataCellType, tsize, tsize)) :+ null).toDF("tiles")
+ val ndTiles =
+ (Seq.fill[Tile](count)(ArrayTile.empty(UByteConstantNoDataCellType, tsize, tsize)) :+ null)
+ .toDF("tiles")
val ndCount2 = ndTiles.select("*").where(is_no_data_tile($"tiles")).count()
ndCount2 should be(count + 1)
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala
index 75cb2879f..b83b94486 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala
@@ -39,6 +39,7 @@ class TileUDTSpec extends TestEnvironment with TestData with Inspectors {
spark.version
val tileEncoder: ExpressionEncoder[Tile] = ExpressionEncoder()
val TileType = new TileUDT()
+ implicit val ser = TileUDT.tileSerializer
describe("TileUDT") {
val tileSizes = Seq(2, 64, 128, 222, 511)
diff --git a/core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala
index 614510c00..c489b8d7b 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala
@@ -20,30 +20,138 @@
*/
package astraea.spark.rasterframes.encoders
+import java.time.ZonedDateTime
+
+import astraea.spark.rasterframes.encoders.StandardEncoders._
+import astraea.spark.rasterframes.model.{CellContext, TileContext, TileDataContext, TileDimensions}
+import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
import astraea.spark.rasterframes.{TestData, TestEnvironment}
import geotrellis.proj4._
+import geotrellis.raster.{CellSize, CellType, TileLayout, UShortUserDefinedNoDataCellType}
+import geotrellis.spark.tiling.LayoutDefinition
+import geotrellis.spark.{Bounds, KeyBounds, SpaceTimeKey, SpatialKey, TileLayerMetadata}
+import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.scalatest.Assertion
class CatalystSerializerSpec extends TestEnvironment with TestData {
+ val dc = TileDataContext(UShortUserDefinedNoDataCellType(3), TileDimensions(12, 23))
+ val tc = TileContext(Extent(1, 2, 3, 4), WebMercator)
+ val cc = CellContext(tc, dc, 34, 45)
+ val ext = Extent(1.2, 2.3, 3.4, 4.5)
+ val tl = TileLayout(10, 10, 20, 20)
+ val ct: CellType = UShortUserDefinedNoDataCellType(5.toShort)
+ val ld = LayoutDefinition(ext, tl)
+ val skb = KeyBounds[SpatialKey](SpatialKey(1, 2), SpatialKey(3, 4))
+
+
+ def assertSerializerMatchesEncoder[T: CatalystSerializer: ExpressionEncoder](value: T): Assertion = {
+ val enc = implicitly[ExpressionEncoder[T]]
+ val ser = CatalystSerializer[T]
+ ser.schema should be (enc.schema)
+ }
+ def assertConsistent[T: CatalystSerializer](value: T): Assertion = {
+ val ser = CatalystSerializer[T]
+ ser.toRow(value) should be(ser.toRow(value))
+ }
+ def assertInvertable[T: CatalystSerializer](value: T): Assertion = {
+ val ser = CatalystSerializer[T]
+ ser.fromRow(ser.toRow(value)) should be(value)
+ }
+
+ def assertContract[T: CatalystSerializer: ExpressionEncoder](value: T): Assertion = {
+ assertConsistent(value)
+ assertInvertable(value)
+ assertSerializerMatchesEncoder(value)
+ }
- import sqlContext.implicits._
describe("Specialized serialization on specific types") {
- it("should support encoding") {
- import sqlContext.implicits._
+// it("should support encoding") {
+// implicit val enc: ExpressionEncoder[CRS] = CatalystSerializerEncoder[CRS]()
+//
+// //println(enc.deserializer.genCode(new CodegenContext))
+// val values = Seq[CRS](LatLng, Sinusoidal, ConusAlbers, WebMercator)
+// val df = spark.createDataset(values)(enc)
+// //df.show(false)
+// val results = df.collect()
+// results should contain allElementsOf values
+// }
- implicit val enc: ExpressionEncoder[CRS] = CatalystSerializerEncoder[CRS]
+ it("should serialize CRS") {
+ val v: CRS = LatLng
+ assertContract(v)
+ }
- val values = Seq[CRS](LatLng, Sinusoidal, ConusAlbers, WebMercator)
- val df = spark.createDataset(values)(enc)
- //df.show(false)
- val results = df.collect()
- results should contain allElementsOf values
+ it("should serialize TileDataContext") {
+ assertContract(dc)
}
- it("should serialize CRS") {
- val ser = CatalystSerializer[CRS]
- ser.fromRow(ser.toRow(LatLng)) should be(LatLng)
- ser.fromRow(ser.toRow(Sinusoidal)) should be(Sinusoidal)
+ it("should serialize TileContext") {
+ assertContract(tc)
+ }
+
+ it("should serialize CellContext") {
+ assertContract(cc)
+ }
+
+ it("should serialize ProjectedRasterTile") {
+ // TODO: Decide if ProjectedRasterTile should be encoded 'flat', non-'flat', or depends
+ val value = TestData.projectedRasterTile(20, 30, -1.2, extent)
+ assertConsistent(value)
+ assertInvertable(value)
+ }
+
+ it("should serialize RasterRef") {
+ val src = RasterSource(remoteCOGSingleband1)
+ val value = RasterRef(src, Some(src.extent.buffer(-3.0)))
+ assertConsistent(value)
+ assertInvertable(value)
+ }
+
+ it("should serialize CellType") {
+ assertContract(ct)
+ }
+
+ it("should serialize Extent") {
+ assertContract(ext)
+ }
+
+ it("should eserialize ProjectedExtent") {
+ val pe = ProjectedExtent(ext, ConusAlbers)
+ assertContract(pe)
+ }
+
+ it("should eserialize SpatialKey") {
+ val v = SpatialKey(2, 3)
+ assertContract(v)
+ }
+
+ it("should eserialize SpaceTimeKey") {
+ val v = SpaceTimeKey(2, 3, ZonedDateTime.now())
+ assertContract(v)
+ }
+
+ it("should serialize CellSize") {
+ val v = CellSize(extent, 50, 60)
+ assertContract(v)
+ }
+
+ it("should serialize TileLayout") {
+ assertContract(tl)
+ }
+
+ it("should serialize LayoutDefinition") {
+ assertContract(ld)
+ }
+
+ it("should serialize Bounds[SpatialKey]") {
+ implicit val skbEnc = ExpressionEncoder[KeyBounds[SpatialKey]]()
+ assertContract(skb)
+ }
+
+ it("should serialize TileLayerMetata[SpatialKey]") {
+ val tlm = TileLayerMetadata(ct, ld, ext, ConusAlbers, skb)
+ assertContract(tlm)
}
}
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala
index dd21b8ce1..a0c0bad0e 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala
@@ -25,6 +25,7 @@ import java.io.File
import java.net.URI
import astraea.spark.rasterframes._
+import astraea.spark.rasterframes.tiles.ProjectedRasterTile
import com.vividsolutions.jts.geom.Envelope
import geotrellis.proj4._
import geotrellis.raster.{CellType, Tile, TileFeature}
@@ -32,6 +33,7 @@ import geotrellis.spark.{SpaceTimeKey, SpatialKey, TemporalProjectedExtent, Tile
import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.spark.sql.Row
import org.apache.spark.sql.functions._
+import org.apache.spark.sql.rf.TileUDT
/**
* Test rig for encoding GT types into Catalyst types.
@@ -44,10 +46,14 @@ class EncodingSpec extends TestEnvironment with TestData {
describe("Spark encoding on standard types") {
- it("should code RDD[(Int, Tile)]") {
- val ds = Seq((1, byteArrayTile: Tile), (2, null)).toDS
- write(ds)
- assert(ds.toDF.as[(Int, Tile)].collect().head === ((1, byteArrayTile)))
+ it("should serialize Tile") {
+ val TileType = new TileUDT()
+
+ forAll(allTileTypes) { t =>
+ noException shouldBe thrownBy {
+ TileType.deserialize(TileType.serialize(t))
+ }
+ }
}
it("should code RDD[Tile]") {
@@ -57,6 +63,12 @@ class EncodingSpec extends TestEnvironment with TestData {
assert(ds.toDF.as[Tile].collect().head === byteArrayTile)
}
+ it("should code RDD[(Int, Tile)]") {
+ val ds = Seq((1, byteArrayTile: Tile), (2, null)).toDS
+ write(ds)
+ assert(ds.toDF.as[(Int, Tile)].collect().head === ((1, byteArrayTile)))
+ }
+
it("should code RDD[TileFeature]") {
val thing = TileFeature(byteArrayTile: Tile, "meta")
val ds = Seq(thing).toDS()
@@ -64,6 +76,13 @@ class EncodingSpec extends TestEnvironment with TestData {
assert(ds.toDF.as[TileFeature[Tile, String]].collect().head === thing)
}
+ it("should code RDD[ProjectedRasterTile]") {
+ val tile = TestData.projectedRasterTile(20, 30, -1.2, extent)
+ val ds = Seq(tile).toDS()
+ write(ds)
+ assert(ds.toDF.as[ProjectedRasterTile].collect().head === tile)
+ }
+
it("should code RDD[Extent]") {
val ds = Seq(extent).toDS()
write(ds)
@@ -85,8 +104,6 @@ class EncodingSpec extends TestEnvironment with TestData {
it("should code RDD[CellType]") {
val ct = CellType.fromName("uint8")
val ds = Seq(ct).toDS()
- //ds.printSchema()
- //ds.show(false)
write(ds)
assert(ds.toDF.as[CellType].first() === ct)
}
@@ -105,7 +122,7 @@ class EncodingSpec extends TestEnvironment with TestData {
assert(ds.toDF.as[(SpatialKey, SpaceTimeKey)].first === (sk, stk))
// This stinks: vvvvvvvv Encoders don't seem to work with UDFs.
- val key2col = udf((row: Row) ⇒ row.getInt(0))
+ val key2col = udf((row: Row) => row.getInt(0))
val colNum = ds.select(key2col(ds(ds.columns.head))).as[Int].first()
assert(colNum === 37)
@@ -118,13 +135,13 @@ class EncodingSpec extends TestEnvironment with TestData {
val results = ds.toDF.as[CRS].collect()
- results should contain allElementsOf (values)
+ results should contain allElementsOf values
}
it("should code RDD[URI]") {
val ds = Seq[URI](new URI("http://astraea.earth/"), new File("/tmp/humbug").toURI).toDS()
write(ds)
- assert(ds.filter(u ⇒ Option(u.getHost).exists(_.contains("astraea"))).count === 1)
+ assert(ds.filter(u => Option(u.getHost).exists(_.contains("astraea"))).count === 1)
}
it("should code RDD[Envelope]") {
@@ -135,6 +152,4 @@ class EncodingSpec extends TestEnvironment with TestData {
}
}
-
}
-
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala
index c38f59cc8..4efe2b474 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala
@@ -23,7 +23,8 @@ package astraea.spark.rasterframes.ref
import astraea.spark.rasterframes.TestEnvironment.ReadMonitor
import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions._
+import astraea.spark.rasterframes.expressions.transformers._
+import astraea.spark.rasterframes.expressions.accessors._
import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
import geotrellis.raster.Tile
import geotrellis.vector.Extent
@@ -184,7 +185,7 @@ class RasterRefSpec extends TestEnvironment with TestData {
new Fixture {
import spark.implicits._
val df = Seq(src).toDF("src")
- val refs = df.select(RasterSourceToRasterRefs($"src"))
+ val refs = df.select(RasterSourceToRasterRefs(true, $"src"))
assert(refs.count() > 1)
}
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala
index f1bfc2dec..1c1fb182a 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala
+++ b/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala
@@ -163,27 +163,4 @@ class RasterSourceSpec extends TestEnvironment with TestData {
}
}
}
-
- describe("RasterSource.readAll") {
- it("should return consistently ordered tiles across bands for a given scene") {
-
- // These specific scenes exhibit the problem where
- // we see different subtile segment ordering across
- // the bands of a given scene.
- val rURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B4.TIF")
- val bURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B2.TIF")
- //val gURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B3.TIF")
-
- val red = RasterSource(rURI).readAll().left.get
- val blue = RasterSource(bURI).readAll().left.get
- //val green = RasterSource(gURI).readAll().left.get
-
- red should not be empty
- red.size should equal(blue.size)
- //red.size should equal(green.size)
-
- red.map(_.dimensions) should contain theSameElementsAs blue.map(_.dimensions)
- //red.map(_.dimensions) should contain theSameElementsInOrderAs green.map(_.dimensions)
- }
- }
}
diff --git a/core/src/test/scala/examples/Exporting.scala b/core/src/test/scala/examples/Exporting.scala
index 5dca99397..247e93944 100644
--- a/core/src/test/scala/examples/Exporting.scala
+++ b/core/src/test/scala/examples/Exporting.scala
@@ -54,7 +54,7 @@ object Exporting extends App {
// The @scaladoc[`tile_to_array`][tile_to_array] column function requires a type parameter to indicate the array element
// type you would like used. The following types may be used: `Int`, `Double`, `Byte`, `Short`, `Float`
- val withArrays = rf.withColumn("tileData", tile_to_array[Short]($"tile")).drop("tile")
+ val withArrays = rf.withColumn("tileData", tile_to_array_int($"tile")).drop("tile")
withArrays.show(5, 40)
// You can convert the data back to an array, but you have to specify the target tile dimensions.
diff --git a/deployment/README.md b/deployment/README.md
index d9ce541da..5e008b8a1 100644
--- a/deployment/README.md
+++ b/deployment/README.md
@@ -56,7 +56,7 @@ To build the Docker image based on local development changes:
```bash
# from the root of the repo
-sbt deployment/rfNotebookContainer
+sbt deployment/rfDocker
```
## Base images
diff --git a/deployment/build.sbt b/deployment/build.sbt
index 66cc330ae..c76ef554b 100644
--- a/deployment/build.sbt
+++ b/deployment/build.sbt
@@ -11,13 +11,13 @@ val Python = config("python")
lazy val rfDockerImageName = settingKey[String]("Name to tag Docker image with.")
rfDockerImageName := "s22s/rasterframes-notebooks"
-lazy val rfNotebookContainer = taskKey[Unit]("Build Jupyter Notebook Docker image with RasterFrames support.")
-rfNotebookContainer := (Docker / packageBin).value
+lazy val rfDocker = taskKey[Unit]("Build Jupyter Notebook Docker image with RasterFrames support.")
+rfDocker := (Docker / packageBin).value
lazy val runRFNotebook = taskKey[String]("Run RasterFrames Jupyter Notebook image")
runRFNotebook := {
val imageName = rfDockerImageName.value
- val _ = rfNotebookContainer.value
+ val _ = rfDocker.value
Process(s"docker run -p 8888:8888 -p 4040:4040 $imageName").run()
imageName
}
diff --git a/docs/src/main/tut/apps/geotrellis-ops.md b/docs/src/main/tut/apps/geotrellis-ops.md
index 4ea3efb08..81a97a3fa 100644
--- a/docs/src/main/tut/apps/geotrellis-ops.md
+++ b/docs/src/main/tut/apps/geotrellis-ops.md
@@ -34,7 +34,7 @@ Here's an example downsampling a tile and rendering each tile as a matrix of num
```tut
val downsample = udf((t: Tile) => t.resample(4, 4))
val downsampled = rf.where(no_data_cells($"tile") === 0).select(downsample($"tile") as "minime")
-downsampled.select(tile_to_array[Float]($"minime") as "cell_values").limit(2).show(false)
+downsampled.select(tile_to_array_double($"minime") as "cell_values").limit(2).show(false)
```
diff --git a/docs/src/main/tut/exporting-rasterframes.md b/docs/src/main/tut/exporting-rasterframes.md
index def137fc8..2015943f5 100644
--- a/docs/src/main/tut/exporting-rasterframes.md
+++ b/docs/src/main/tut/exporting-rasterframes.md
@@ -27,11 +27,11 @@ The cell values within a `Tile` are encoded internally as an array. There may be
where the additional context provided by the `Tile` construct is no longer needed and one would
prefer to work with the underlying array data.
-The @scaladoc[`tileToArray`][tileToArray] column function requires a type parameter to indicate the array element
+The @scaladoc[`tile_to_array_int`][tile_to_array_int] column function requires a type parameter to indicate the array element
type you would like used. The following types may be used: `Int`, `Double`, `Byte`, `Short`, `Float`
```tut
-val withArrays = rf.withColumn("tileData", tileToArray[Short]($"tile")).drop("tile")
+val withArrays = rf.withColumn("tileData", tile_to_array_int($"tile")).drop("tile")
withArrays.show(5, 40)
```
@@ -186,5 +186,5 @@ spark.stop()
[rfInit]: astraea.spark.rasterframes.package#rfInit%28SQLContext%29:Unit
[rdd]: org.apache.spark.sql.Dataset#frdd:org.apache.spark.rdd.RDD[T]
[toTileLayerRDD]: astraea.spark.rasterframes.RasterFrameMethods#toTileLayerRDD%28tileCol:RasterFrameMethods.this.TileColumn%29:Either[geotrellis.spark.TileLayerRDD[geotrellis.spark.SpatialKey],geotrellis.spark.TileLayerRDD[geotrellis.spark.SpaceTimeKey]]
-[tileToArray]: astraea.spark.rasterframes.ColumnFunctions#tileToArray
+[tile_to_array_int]: astraea.spark.rasterframes.ColumnFunctions#tile_to_array_int
diff --git a/docs/src/main/tut/ml/statistics.md b/docs/src/main/tut/ml/statistics.md
index 6b0328146..3ff086ad1 100644
--- a/docs/src/main/tut/ml/statistics.md
+++ b/docs/src/main/tut/ml/statistics.md
@@ -81,7 +81,7 @@ rf.select(agg_stats($"tile")).show()
A more involved example: extract bin counts from a computed `Histogram`.
```tut
-rf.select(agg_histogram($"tile")).
+rf.select(agg_approx_histogram($"tile")).
map(h => for(v <- h.labels) yield(v, h.itemCount(v))).
select(explode($"value") as "counts").
select("counts._1", "counts._2").
diff --git a/docs/src/main/tut/pyrasterframes.md b/docs/src/main/tut/pyrasterframes.md
index 027746c10..35a5169d5 100644
--- a/docs/src/main/tut/pyrasterframes.md
+++ b/docs/src/main/tut/pyrasterframes.md
@@ -7,9 +7,8 @@ in the meantime:
* [PyRasterFrames README](https://github.com/locationtech/rasterframes/blob/develop/pyrasterframes/python/README.rst)
* [PyRasterFrames Examples](https://github.com/locationtech/rasterframes/tree/develop/pyrasterframes/python/examples)
* [RasterFrames Jupyter Notebook](https://github.com/locationtech/rasterframes/blob/develop/deployment/README.md)
+* @ref:[PyRasterFrames Functions](reference.md)
-Most features available in the Scala API are exposed in the Python API, and take almost the same form as they
-do in Scala. Python UDFs on `Tile` are not yet supported.
+Most features available in the Scala API are exposed in the Python API, refer to the @ref:[function reference](reference.md). Defining a [udf](http://spark.apache.org/docs/latest/api/python/pyspark.sql.html#pyspark.sql.functions.udf) using a `Tile` column through the Python API is not yet supported.
-If there's a specific feature that appears to be missing in the Python version [please submit an issue](https://github.com/locationtech/rasterframes/issues)
-so that we might address it for you.
\ No newline at end of file
+If there's a specific feature that appears to be missing in the Python version [please submit an issue](https://github.com/locationtech/rasterframes/issues) so that we might address it for you.
diff --git a/docs/src/main/tut/reference.md b/docs/src/main/tut/reference.md
index 704fbe367..d264d900d 100644
--- a/docs/src/main/tut/reference.md
+++ b/docs/src/main/tut/reference.md
@@ -1,8 +1,946 @@
-# Reference
+# Function Reference
+
+For the most up to date list of User Defined Functions using Tiles, look at API documentation for @scaladoc[`RasterFunctions`][RasterFunctions].
+
+The full Scala API documentation can be found [here][scaladoc].
+
+RasterFrames also provides SQL and Python bindings to many UDFs using the `Tile` column type. In Spark SQL, the functions are already registered in the SQL engine; they are usually prefixed with `rf_`. In Python, they are available in the `pyrasterframes.rasterfunctions` module.
+
+The convention in this document will be to define the function signature as below, with its return type, the function name, and named arguments with their types.
+
+```
+ReturnDataType function_name(InputDataType argument1, InputDataType argument2)
+```
+
+## List of Available SQL and Python Functions
+
+The convention in this document will be to define the function signature as below, with its return type, the function name, and named arguments with their types.
+
+```
+ReturnDataType function_name(InputDataType argument1, InputDataType argument2)
+```
+
+@@toc { depth=3 }
+
+### Vector Operations
+
+Various LocationTech GeoMesa UDFs to deal with `geomtery` type columns are also provided in the SQL engine and within the `pyrasterframes.rasterfunctions` Python module. These are documented in the [LocationTech GeoMesa Spark SQL documentation](https://www.geomesa.org/documentation/user/spark/sparksql_functions.html#). These functions are all prefixed with `st_`.
+
+RasterFrames provides two additional functions for vector geometry.
+
+#### reproject_geometry
+
+_Python_:
+ Geometry reproject_geometry(Geometry geom, String origin_crs, String destination_crs)
+
+_SQL_: `rf_reproject_geometry`
+
+Reproject the vector `geom` from `origin_crs` to `destination_crs`. Both `_crs` arguments are either [proj4](https://proj4.org/usage/quickstart.html) strings, [EPSG codes](https://www.epsg-registry.org/) codes or [OGC WKT](https://www.opengeospatial.org/standards/wkt-crs) for coordinate reference systems.
+
+
+#### envelope
+
+_Python_:
+
+ Struct[Double minX, Double maxX, Double minY, Double maxY] envelope(Geometry geom)
+
+Python only. Extracts the bounding box (envelope) of the geometry.
+
+See also GeoMesa [st_envelope](https://www.geomesa.org/documentation/user/spark/sparksql_functions.html#st-envelope) which returns a Geometry type.
+
+### Tile Metadata and Mutation
+
+Functions to access and change the particulars of a `tile`: its shape and the data type of its cells. See below section on @ref:[masking and nodata](reference.md#masking-and-nodata) for additional discussion of cell types.
+
+#### cell_types
+
+
+_Python_:
+
+ Array[String] cell_types()
+
+_SQL_: `rf_cell_types`
+
+Print an array of possible cell type names, as below. These names are used in other functions. See @ref:[discussion on nodata](reference.md#masking-and-nodata) for additional details.
+
+|cell_types |
+|----------|
+|bool |
+|int8raw |
+|int8 |
+|uint8raw |
+|uint8 |
+|int16raw |
+|int16 |
+|uint16raw |
+|uint16 |
+|int32raw |
+|int32 |
+|float32raw|
+|float32 |
+|float64raw|
+|float64 |
+
+
+#### tile_dimensions
+
+_Python_:
+
+ Struct[Int, Int] tile_dimensions(Tile tile)
+
+_SQL_: `rf_tile_dimensions`
+
+Get number of columns and rows in the `tile`, as a Struct of `cols` and `rows`.
+
+#### cell_type
+
+_Python_:
+
+ Struct[String] cell_type(Tile tile)
+
+_SQL_: `rf_cell_type`
+
+Get the cell type of the `tile`. Available cell types can be retrieved with the @ref:[cell_types](reference.md#cell-types) function.
+
+#### convert_cell_type
+
+_Python_:
+
+ Tile convert_cell_type(Tile tileCol, String cellType)
+
+_SQL_: `rf_convert_cell_type`
+
+Convert `tileCol` to a different cell type.
+
+#### resample
+
+_Python_:
+
+ Tile resample(Tile tile, Double factor)
+ Tile resample(Tile tile, Int factor)
+ Tile resample(Tile tile, Tile shape_tile)
+
+_SQL_: `rf_resample`
+
+Change the tile dimension. Passing a numeric `factor` will scale the number of columns and rows in the tile: 1.0 is the same number of columns and row; less than one downsamples the tile; and greater than one upsamples the tile. Passing a `shape_tile` as the second argument outputs `tile` having the same number of columns and rows as `shape_tile`. All resampling is by nearest neighbor method.
+
+### Tile Creation
+
+Functions to create a new Tile column, either from scratch or from existing data not yet in a `tile`.
+
+#### tile_zeros
+
+_Python_:
+
+```
+Tile tile_zeros(Int tile_columns, Int tile_rows, String cell_type_name)
+```
+
+_SQL_: `rf_tile_zeros`
+
+Create a `tile` of shape `tile_columns` by `tile_rows` full of zeros, with the specified cell type. See function @ref:[`cell_types`](reference.md#cell-types) for valid values. All arguments are literal values and not column expressions.
+
+#### tile_ones
+
+_Python_:
+
+```
+Tile tile_ones(Int tile_columns, Int tile_rows, String cell_type_name)
+```
+
+_SQL_: `rf_tile_ones`
+
+Create a `tile` of shape `tile_columns` by `tile_rows` full of ones, with the specified cell type. See function @ref:[`cell_types`](reference.md#cell-types) for valid values. All arguments are literal values and not column expressions.
+
+#### make_constant_tile
+
+_Python_:
+
+ Tile make_constant_tile(Numeric constant, Int tile_columns, Int tile_rows, String cell_type_name)
+
+_SQL_: `rf_make_constant_tile`
+
+Create a `tile` of shape `tile_columns` by `tile_rows` full of `constant`, with the specified cell type. See function @ref:[`cell_types`](reference.md#cell-types) for valid values. All arguments are literal values and not column expressions.
+
+
+#### rasterize
+
+_Python_:
+
+ Tile rasterize(Geometry geom, Geometry tile_bounds, Int value, Int tile_columns, Int tile_rows)
+
+_SQL_: `rf_rasterize`
+
+Convert a vector Geometry `geom` into a Tile representation. The `value` will be "burned-in" to the returned `tile` where the `geom` intersects the `tile_bounds`. Returned `tile` will have shape `tile_columns` by `tile_rows`. Values outside the `geom` will be assigned a nodata value. Returned `tile` has cell type `int32`, note that `value` is of type Int.
+
+Parameters `tile_columns` and `tile_rows` are literals, not column expressions. The others are column expressions.
+
+
+Example use. In the code snip below, you can visualize the `tri` and `b` geometries with tools like [Wicket](https://arthur-e.github.io/Wicket/sandbox-gmaps3.html). The result is a right triangle burned into the `tile`, with nodata values shown as ∘.
+
+
+```python
+spark.sql("""
+SELECT rf_render_ascii(
+ rf_rasterize(tri, b, 8, 10, 10))
+
+FROM
+ ( SELECT st_geomFromWKT('POLYGON((1.5 0.5, 1.5 1.5, 0.5 0.5, 1.5 0.5))') AS tri,
+ st_geomFromWKT('POLYGON((0.0 0.0, 2.0 0.0, 2.0 2.0, 0.0 2.0, 0.0 0.0))') AS b
+ ) r
+""").show(1, False)
+
+-----------
+|∘∘∘∘∘∘∘∘∘∘
+∘∘∘∘∘∘∘∘∘∘
+∘∘∘∘∘∘∘∘∘∘
+∘∘∘∘∘∘∘ ∘∘
+∘∘∘∘∘∘ ∘∘
+∘∘∘∘∘ ∘∘
+∘∘∘∘ ∘∘
+∘∘∘ ∘∘
+∘∘∘∘∘∘∘∘∘∘
+∘∘∘∘∘∘∘∘∘∘|
+-----------
+```
+
+
+#### array_to_tile
+
+_Python_:
+
+ Tile array_to_tile(Array arrayCol, Int numCols, Int numRows)
+
+Python only. Create a `tile` from a Spark SQL [Array](http://spark.apache.org/docs/2.3.2/api/python/pyspark.sql.html#pyspark.sql.types.ArrayType), filling values in row-major order.
+
+#### assemble_tile
+
+_Python_:
+
+ Tile assemble_tile(Int colIndex, Int rowIndex, Numeric cellData, Int numCols, Int numRows, String cellType)
+
+Python only. Create a Tile from a column of cell data with location indices. This function is the inverse of @ref:[`explode_tiles`](reference.md#explode-tiles). Intended use is with a `groupby`, producing one row with a new `tile` per group. The `numCols`, `numRows` and `cellType` arguments are literal values, others are column expressions. Valid values for `cellType` can be found with function @ref:[`cell_types`](reference.md#cell-types).
+
+### Masking and Nodata
+
+In raster operations, the preservation and correct processing of missing operations is very important. The idea of missing data is often expressed as a null or NaN. In raster data, missing observations are often termed NODATA; we will style them as nodata in this document. RasterFrames provides a variety of functions to manage and inspect nodata within `tile`s.
+
+See also statistical summaries to get the count of data and nodata values per `tile` and aggregate in a `tile` column: @ref:[`data_cells`](reference.md#data-cells), @ref:[`no_data_cells`](reference.md#no-data-cells), @ref:[`agg_data_cells`](reference.md#agg-data-cells), @ref:[`agg_no_data_cells`](reference.md#agg-no-data-cells).
+
+It is important to note that not all cell types support the nodata representation: these are `bool` and when the cell type string ends in `raw`.
+
+For integral valued cell types, the nodata is marked by a special sentinel value. This can be a default, typically zero or the minimum value for the underlying data type. The nodata value can also be a user-defined value. For example if the value 4 is to be interpreted as nodata, the cell type will read 'int32ud4'.
+
+For float cell types, the nodata can either be NaN or a user-defined value; for example `'float32ud-999.9'` would mean the value -999.9 is interpreted as a nodata.
+
+For more reading about cell types and ndodata, see the [GeoTrellis documentation](https://geotrellis.readthedocs.io/en/latest/guide/core-concepts.html?#working-with-cell-values).
+
+#### mask
+
+_Python_:
+
+ Tile mask(Tile tile, Tile mask)
+
+_SQL_: `rf_mask`
+
+Where the `mask` contains nodata, replace values in the `tile` with nodata.
+
+Returned `tile` cell type will be coerced to one supporting nodata if it does not already.
+
+
+#### inverse_mask
+
+_Python_:
+
+ Tile inverse_mask(Tile tile, Tile mask)
+
+_SQL_: `rf_inverse_mask`
+
+Where the `mask` _does not_ contain nodata, replace values in `tile` with nodata.
+
+#### mask_by_value
+
+_Python_:
+
+ Tile mask_by_value(Tile data_tile, Tile mask_tile, Int mask_value)
+
+_SQL_: `rf_mask_by_value`
+
+Generate a `tile` with the values from `data_tile`, with nodata in cells where the `mask_tile` is equal to `mask_value`.
+
+
+#### is_no_data_tile
+
+_Python_:
+
+ Boolean is_no_data_tile(tile)
+
+_SQL_: `rf_is_no_data_tile`
+
+Returns true if `tile` contains only nodata. By definition returns false if cell type does not support nodata.
+
+#### with_no_data
+
+_Python_:
+
+ Tile with_no_data(Tile tile, Double no_data_value)
+
+Python only. Return a `tile` column marking as nodata all cells equal to `no_data_value`.
+
+The `no_data_value` argument is a literal Double, not a Column expression.
+
+If input `tile` had a nodata value already, the behaviour depends on if its cell type is floating point or not. For floating point cell type `tile`, nodata values on the input `tile` remain nodata values on the output. For integral cell type `tile`s, the previous nodata values become literal values.
+
+### Map Algebra
+
+[Map algebra](https://gisgeography.com/map-algebra-global-zonal-focal-local/) raster operations are element-wise operations between a `tile` and a scalar, between two `tile`s, or among many `tile`s.
+
+Some of these functions have similar variations in the Python API:
+
+ - `local_op`: applies `op` to two columns; the right hand side can be a `tile` or a numeric column.
+ - `local_op_scalar`: applies `op` to a `tile` and a literal scalar, coercing the `tile` to a floating point type
+ - `local_op_scalar_int`: applies `op` to a `tile` and a literal scalar, without coercing the `tile` to a floating point type
+
+We will provide all these variations for `local_add` and then suppress the rest in this document.
+
+The SQL API does not require the `local_op_scalar` or `local_op_scalar_int` forms.
+
+#### local_add
+
+_Python_:
+
+ Tile local_add(Tile tile1, Tile rhs)
+ Tile local_add(Tile tile1, Int rhs)
+ Tile local_add(Tile tile1, Double rhs)
+
+_SQL_: `rf_local_add`
+
+Returns a `tile` column containing the element-wise sum of `tile1` and `rhs`.
+
+#### local_add_scalar
+
+_Python_:
+
+ Tile local_add_scalar(Tile tile, Double scalar)
+
+_SQL_: `rf_local_add_scalar`
+
+Returns a `tile` column containing the element-wise sum of `tile` and `scalar`. If `tile` is integral type, it will be coerced to floating before addition; returns float valued `tile`.
+
+
+#### local_add_scalar_int
+
+_Python_:
+
+ Tile local_add_scalar_int(Tile tile, Int scalar)
+
+_SQL_: `rf_local_add_scalar_int`
+
+Returns a `tile` column containing the element-wise sum of `tile` and `scalar`. If `tile` is integral type, returns integral type `tile`.
+
+#### local_subtract
+
+_Python_:
+
+ Tile local_subtract(Tile tile1, Tile rhs)
+ Tile local_subtract(Tile tile1, Int rhs)
+ Tile local_subtract(Tile tile1, Double rhs)
+
+_SQL_: `rf_local_subtract`
+
+Returns a `tile` column containing the element-wise difference of `tile1` and `rhs`.
+
+
+#### local_multiply
+
+_Python_:
+
+ Tile local_multiply(Tile tile1, Tile rhs)
+ Tile local_multiply(Tile tile1, Int rhs)
+ Tile local_multiply(Tile tile1, Double rhs)
+
+_SQL_: `rf_local_multiply`
+
+Returns a `tile` column containing the element-wise product of `tile1` and `rhs`. This is **not** the matrix multiplication of `tile1` and `rhs`.
+
+
+#### local_divide
+
+_Python_:
+
+ Tile local_divide(Tile tile1, Tile rhs)
+ Tile local_divide(Tile tile1, Int rhs)
+ Tile local_divide(Tile tile1, Double rhs)
+
+_SQL_: `rf_local_divide`
+
+Returns a `tile` column containing the element-wise quotient of `tile1` and `rhs`.
+
+
+#### normalized_difference
+
+_Python_:
+
+ Tile normalized_difference(Tile tile1, Tile tile2)
+
+_SQL_: `rf_normalized_difference`
+
+Compute the normalized difference of the the two `tile`s: `(tile1 - tile2) / (tile1 + tile2)`. Result is always floating point cell type. This function has no scalar variant.
+
+#### local_less
+
+_Python_:
+
+ Tile local_less(Tile tile1, Tile rhs)
+ Tile local_less(Tile tile1, Int rhs)
+ Tile local_less(Tile tile1, Double rhs)
+
+_SQL_: `rf_less`
+
+Returns a `tile` column containing the element-wise evaluation of `tile1` is less than `rhs`.
+
+#### local_less_equal
+
+_Python_:
+
+ Tile local_less_equal(Tile tile1, Tile rhs)
+ Tile local_less_equal(Tile tile1, Int rhs)
+ Tile local_less_equal(Tile tile1, Double rhs)
+
+_SQL_: `rf_less_equal`
+
+Returns a `tile` column containing the element-wise evaluation of `tile1` is less than or equal to `rhs`.
+
+#### local_greater
+
+_Python_:
+
+ Tile local_greater(Tile tile1, Tile rhs)
+ Tile local_greater(Tile tile1, Int rhs)
+ Tile local_greater(Tile tile1, Double rhs)
+
+_SQL_: `rf_greater`
+
+Returns a `tile` column containing the element-wise evaluation of `tile1` is greater than `rhs`.
+
+#### local_greater_equal
+
+_Python_:
+
+ Tile local_greater_equal(Tile tile1, Tile rhs)
+ Tile local_greater_equal(Tile tile1, Int rhs)
+ Tile local_greater_equal(Tile tile1, Double rhs)
+
+_SQL_: `rf_greater_equal`
+
+Returns a `tile` column containing the element-wise evaluation of `tile1` is greater than or equal to `rhs`.
+
+#### local_equal
+
+_Python_:
+
+ Tile local_equal(Tile tile1, Tile rhs)
+ Tile local_equal(Tile tile1, Int rhs)
+ Tile local_equal(Tile tile1, Double rhs)
+
+_SQL_: `rf_equal`
+
+Returns a `tile` column containing the element-wise equality of `tile1` and `rhs`.
+
+#### local_unequal
+
+_Python_:
+
+ Tile local_unequal(Tile tile1, Tile rhs)
+ Tile local_unequal(Tile tile1, Int rhs)
+ Tile local_unequal(Tile tile1, Double rhs)
+
+_SQL_: `rf_unequal`
+
+Returns a `tile` column containing the element-wise inequality of `tile1` and `rhs`.
+
+#### round
+
+_Python_:
+
+ Tile round(Tile tile)
+
+_SQL_: `rf_round`
+
+Round cell values to the nearest integer without changing the cell type.
+
+#### exp
+
+_Python_:
+
+ Tile exp(Tile tile)
+
+_SQL_: `rf_exp`
+
+Performs cell-wise exponential.
+
+#### exp10
+
+_Python_:
+
+ Tile exp10(Tile tile)
+
+_SQL_: `rf_exp10`
+
+Compute 10 to the power of cell values.
+
+#### exp2
+
+_Python_:
+
+ Tile exp2(Tile tile)
+
+_SQL_: `rf_exp2`
+
+Compute 2 to the power of cell values.
+
+#### expm1
+
+_Python_:
+
+ Tile expm1(Tile tile)
+
+_SQL_: `rf_expm1`
+
+Performs cell-wise exponential, then subtract one. Inverse of @ref:[`log1p`](reference.md#log1p).
+
+#### log
+
+_Python_:
+
+ Tile log(Tile tile)
+
+_SQL_: `rf_log`
+
+Performs cell-wise natural logarithm.
+
+#### log10
+
+_Python_:
+
+ Tile log10(Tile tile)
+
+_SQL_: `rf_log10`
+
+Performs cell-wise logarithm with base 10.
+
+#### log2
+
+_Python_:
+
+ Tile log2(Tile tile)
+
+_SQL_: `rf_log2`
+
+Performs cell-wise logarithm with base 2.
+
+#### log1p
+
+_Python_:
+
+ Tile log1p(Tile tile)
+
+_SQL_: `rf_log1p`
+
+Performs natural logarithm of cell values plus one. Inverse of @ref:[`expm1`](reference.md#expm1).
+
+### Tile Statistics
+
+The following functions compute a statistical summary per row of a `tile` column. The statistics are computed across the cells of a single `tile`, within each DataFrame Row. Consider the following example.
+
+```python
+import pyspark.functions as F
+spark.sql("""
+ SELECT 1 as id, rf_tile_ones(5, 5, 'float32') as t
+ UNION
+ SELECT 2 as id, rf_local_multiply(rf_tile_ones(5, 5, 'float32'), 3) as t
+ """).select(F.col('id'), tile_sum(F.col('t'))).show()
+
+
++---+-----------+
+| id|tile_sum(t)|
++---+-----------+
+| 2| 75.0|
+| 1| 25.0|
++---+-----------+
+```
+
+
+#### tile_sum
+
+_Python_:
+
+ Double tile_sum(Tile tile)
+
+_SQL_: `rf_tile_sum`
+
+Computes the sum of cells in each row of column `tile`, ignoring nodata values.
+
+#### tile_mean
+
+_Python_:
+
+ Double tile_mean(Tile tile)
+
+_SQL_: `rf_tile_mean`
+
+Computes the mean of cells in each row of column `tile`, ignoring nodata values.
+
+
+#### tile_min
+
+_Python_:
+
+ Double tile_min(Tile tile)
+
+_SQL_: `rf_tile_min`
+
+Computes the min of cells in each row of column `tile`, ignoring nodata values.
+
+
+#### tile_max
+
+_Python_:
+
+ Double tile_max(Tile tile)
+
+_SQL_: `rf_tile_max`
+
+Computes the max of cells in each row of column `tile`, ignoring nodata values.
+
+
+#### no_data_cells
+
+_Python_:
+
+ Long no_data_cells(Tile tile)
+
+_SQL_: `rf_no_data_cells`
+
+Return the count of nodata cells in the `tile`.
+
+#### data_cells
+
+_Python_:
+
+ Long data_cells(Tile tile)
+
+_SQL_: `rf_data_cells`
+
+Return the count of data cells in the `tile`.
+
+#### tile_stats
+
+_Python_:
+
+ Struct[Long, Long, Double, Double, Double, Double] tile_stats(Tile tile)
+
+_SQL_: `tile_stats`
+
+Computes the following statistics of cells in each row of column `tile`: data cell count, nodata cell count, minimum, maximum, mean, and variance. The minimum, maximum, mean, and variance are computed ignoring nodata values.
+
+
+#### tile_histogram
+
+_Python_:
+
+ Struct[Struct[Long, Long, Double, Double, Double, Double], Array[Struct[Double, Long]]] tile_histogram(Tile tile)
+
+_SQL_: `rf_tile_histogram`
+
+Computes a statistical summary of cell values within each row of `tile`. Resulting column has the below schema. Note that several of the other `tile` statistics functions are convenience methods to extract parts of this result. Related is the @ref:[`agg_approx_histogram`](reference.md#agg-approx-histogram) which computes the statistics across all rows in a group.
+
+```
+ |-- tile_histogram: struct (nullable = true)
+ | |-- stats: struct (nullable = true)
+ | | |-- dataCells: long (nullable = false)
+ | | |-- noDataCells: long (nullable = false)
+ | | |-- min: double (nullable = false)
+ | | |-- max: double (nullable = false)
+ | | |-- mean: double (nullable = false)
+ | | |-- variance: double (nullable = false)
+ | |-- bins: array (nullable = true)
+ | | |-- element: struct (containsNull = true)
+ | | | |-- value: double (nullable = false)
+ | | | |-- count: long (nullable = false)
+```
+
+### Aggregate Tile Statistics
+
+These functions compute statistical summaries over all of the cell values *and* across all the rows in the DataFrame or group. Example use below computes a single double-valued mean per month, across all data cells in the `red_band` `tile` type column. This would return at most twelve rows.
+
+
+```python
+from pyspark.functions import month
+from pyrasterframes.functions import agg_mean
+rf.groupby(month(rf.datetime)).agg(agg_mean(rf.red_band).alias('red_mean_monthly'))
+```
+
+Continuing our example from the @ref:[Tile Statistics](reference.md#tile-statistics) section, consider the following. Note that only a single row is returned. It is averaging 25 values of 1.0 and 25 values of 3.0, across the fifty cells in two rows.
+
+```python
+spark.sql("""
+SELECT 1 as id, rf_tile_ones(5, 5, 'float32') as t
+UNION
+SELECT 2 as id, rf_local_multiply_scalar(rf_tile_ones(5, 5, 'float32'), 3) as t
+""").agg(agg_mean(F.col('t'))).show(10, False)
+
++-----------+
+|agg_mean(t)|
++-----------+
+|2.0 |
++-----------+
+```
+
+#### agg_mean
+
+_Python_:
+
+ Double agg_mean(Tile tile)
+
+_SQL_: @ref:[`rf_agg_stats`](reference.md#agg-stats)`(tile).mean`
+
+Aggregates over the `tile` and return the mean of cell values, ignoring nodata. Equivalent to @ref:[`agg_stats`](reference.md#agg-stats)`.mean`.
+
+
+#### agg_data_cells
+
+_Python_:
+
+ Long agg_data_cells(Tile tile)
+
+_SQL_: @ref:[`rf_agg_stats`](reference.md#agg-stats)`(tile).dataCells`
+
+Aggregates over the `tile` and return the count of data cells. Equivalent to @ref:[`agg_stats`](reference.md#agg-stats)`.dataCells`. C.F. `data_cells`; equivalent code:
+
+```python
+rf.select(agg_data_cells(rf.tile).alias('agg_data_cell')).show()
+# Equivalent to
+rf.agg(F.sum(data_cells(rf.tile)).alias('agg_data_cell')).show()
+```
+
+#### agg_no_data_cells
+
+_Python_:
+
+ Long agg_no_data_cells(Tile tile)
+
+_SQL_: @ref:[`rf_agg_stats`](reference.md#agg-stats)`(tile).noDataCells`
+
+Aggregates over the `tile` and return the count of nodata cells. Equivalent to @ref:[`agg_stats`](reference.md#agg-stats)`.noDataCells`. C.F. @ref:[`no_data_cells`](reference.md#no-data-cells) a row-wise count of no data cells.
+
+#### agg_stats
+
+_Python_:
+
+ Struct[Long, Long, Double, Double, Double, Double] agg_stats(Tile tile)
+
+_SQL_: `rf_agg_stats`
+
+Aggregates over the `tile` and returns statistical summaries of cell values: number of data cells, number of nodata cells, minimum, maximum, mean, and variance. The minimum, maximum, mean, and variance ignore the presence of nodata.
+
+#### agg_approx_histogram
+
+_Python_:
+
+ Struct[Struct[Long, Long, Double, Double, Double, Double], Array[Struct[Double, Long]]] agg_approx_histogram(Tile tile)
+
+_SQL_: `rf_agg_approx_histogram`
+
+Aggregates over the `tile` return statistical summaries of the cell values, including a histogram, in the below schema. The `bins` array is of tuples of histogram values and counts. Typically values are plotted on the x-axis and counts on the y-axis.
+
+Note that several of the other cell value statistics functions are convenience methods to extract parts of this result. Related is the @ref:[`tile_histogram`](reference.md#tile-histogram) function which operates on a single row at a time.
+
+```
+ |-- agg_approx_histogram: struct (nullable = true)
+ | |-- stats: struct (nullable = true)
+ | | |-- dataCells: long (nullable = false)
+ | | |-- noDataCells: long (nullable = false)
+ | | |-- min: double (nullable = false)
+ | | |-- max: double (nullable = false)
+ | | |-- mean: double (nullable = false)
+ | | |-- variance: double (nullable = false)
+ | |-- bins: array (nullable = true)
+ | | |-- element: struct (containsNull = true)
+ | | | |-- value: double (nullable = false)
+ | | | |-- count: long (nullable = false)
+```
+
+### Tile Local Aggregate Statistics
+
+Local statistics compute the element-wise statistics across a DataFrame or group of `tile`s, resulting in a `tile` that has the same dimension.
+
+Consider again our example for Tile Statistics and Aggregate Tile Statistics, this time apply @ref:[`agg_local_mean`](reference.md#agg-local-mean). We see that it is computing the element-wise mean across the two rows. In this case it is computing the mean of one value of 1.0 and one value of 3.0 to arrive at the element-wise mean, but doing so twenty-five times, one for each position in the `tile`.
+
+
+```python
+import pyspark.functions as F
+lam = spark.sql("""
+SELECT 1 as id, rf_tile_ones(5, 5, 'float32') as t
+UNION
+SELECT 2 as id, rf_local_multiply(rf_tile_ones(5, 5, 'float32'), 3) as t
+""").agg(local_agg_mean(F.col('t')).alias('l')) \
+
+## local_agg_mean returns a tile
+lam.select(tile_dimensions(lam.l)).show()
+##
++------------------+
+|tile_dimensions(l)|
++------------------+
+| [5, 5]|
++------------------+
+##
+
+lam.select(explode_tiles(lam.l)).show(10, False)
+##
++------------+---------+---+
+|column_index|row_index|l |
++------------+---------+---+
+|0 |0 |2.0|
+|1 |0 |2.0|
+|2 |0 |2.0|
+|3 |0 |2.0|
+|4 |0 |2.0|
+|0 |1 |2.0|
+|1 |1 |2.0|
+|2 |1 |2.0|
+|3 |1 |2.0|
+|4 |1 |2.0|
++------------+---------+---+
+only showing top 10 rows
+```
+
+
+#### agg_local_max
+
+_Python_:
+
+ Tile agg_local_max(Tile tile)
+
+_SQL_: `rf_agg_local_max`
+
+Compute the cell-local maximum operation over Tiles in a column.
+
+#### agg_local_min
+
+_Python_:
+
+ Tile agg_local_min(Tile tile)
+
+_SQL_: `rf_agg_local_min`
+
+Compute the cell-local minimum operation over Tiles in a column.
+
+#### agg_local_mean
+
+_Python_:
+
+ Tile agg_local_mean(Tile tile)
+
+_SQL_: `rf_agg_local_mean`
+
+Compute the cell-local mean operation over Tiles in a column.
+
+#### agg_local_data_cells
+
+_Python_:
+
+ Tile agg_local_data_cells(Tile tile)
+
+_SQL_: `rf_agg_local_data_cells`
+
+Compute the cell-local count of data cells over Tiles in a column. Returned `tile` has a cell type of `int32`.
+
+#### agg_local_no_data_cells
+
+_Python_:
+
+ Tile agg_local_no_data_cells(Tile tile)
+
+_SQL_: `rf_agg_local_no_data_cells`
+
+Compute the cell-local count of nodata cells over Tiles in a column. Returned `tile` has a cell type of `int32`.
+
+#### agg_local_stats
+
+_Python_:
+
+ Struct[Tile, Tile, Tile, Tile, Tile] agg_local_stats(Tile tile)
+
+_SQL_: `rf_agg_local_stats`
+
+Compute cell-local aggregate count, minimum, maximum, mean, and variance for a column of Tiles. Returns a struct of five `tile`s.
+
+
+### Converting Tiles
+
+RasterFrames provides several ways to convert a `tile` into other data structures. See also functions for @ref:[creating tiles](reference.md#tile-creation).
+
+#### explode_tiles
+
+_Python_:
+
+ Int, Int, Numeric* explode_tiles(Tile* tile)
+
+_SQL_: `rf_explode_tiles`
+
+Create a row for each cell in `tile` columns. Many `tile` columns can be passed in, and the returned DataFrame will have one numeric column per input. There will also be columns for `column_index` and `row_index`. Inverse of @ref:[`assemble_tile`](reference.md#assemble-tile). When using this function, be sure to have a unique identifier for rows in order to successfully invert the operation.
+
+#### explode_tiles_sample
+
+_Python_:
+
+ Int, Int, Numeric* explode_tiles_sample(Double sample_frac, Long seed, Tile* tile)
+
+Python only. As with @ref:[`explode_tiles`](reference.md#explode-tiles), but taking a randomly sampled subset of cells. Equivalent to the below, but this implementation is optimized for speed. Parameter `sample_frac` should be between 0.0 and 1.0.
+
+```python
+df.select(df.id, explode_tiles(df.tile1, df.tile2, df.tile3)) \
+ .sample(False, 0.05, 8675309)
+# Equivalent result, faster
+df.select(df.id, explode_tiles_sample(0.05, 8675309, df.tile1, df.tile2, df.tile3)) \
+```
+
+#### tile_to_int_array
+
+_Python_:
+
+ Array tile_to_int_array(Tile tile)
+
+_SQL_: `rf_tile_to_int_array`
+
+
+Convert Tile column to Spark SQL [Array](http://spark.apache.org/docs/2.3.2/api/python/pyspark.sql.html#pyspark.sql.types.ArrayType), in row-major order. Float cell types will be coerced to integral type by flooring.
+
+
+#### tile_to_double_array
+
+_Python_:
+
+ Array tile_to_double_arry(Tile tile)
+
+_SQL_: `rf_tile_to_double_array`
+
+Convert tile column to Spark [Array](http://spark.apache.org/docs/2.3.2/api/python/pyspark.sql.html#pyspark.sql.types.ArrayType), in row-major order. Integral cell types will be coerced to floats.
+
+
+#### render_ascii
+
+_Python_:
+
+ String render_ascii(Tile tile)
+
+_SQL_: `rf_render_ascii`
+
+Pretty print the tile values as plain text.
-For the most up to date list of UDFs, look at API documentation for @scaladoc[`RasterFunctions`][RasterFunctions]. These UDFs are also registered with the SQL engine under the same name but with a `rf_` prefix (e.g. `data_cells` becomes `rf_data_cells`).
-The full API documentation can be found [here][scaladoc].
[RasterFunctions]: astraea.spark.rasterframes.RasterFunctions
[scaladoc]: latest/api/index.html
+
diff --git a/docs/src/main/tut/release-notes.md b/docs/src/main/tut/release-notes.md
index 3b4daa5ce..c043ac772 100644
--- a/docs/src/main/tut/release-notes.md
+++ b/docs/src/main/tut/release-notes.md
@@ -4,9 +4,17 @@
### 0.8.0
+* Added new tile functions `round`, `log`, `log10`, `log2`, `log1p`, `exp`, `exp10`, `exp2`, `expm1`, `resample`, `resample`.
* Introduced at the source level the concept of a `RasterSource` and `RasterRef`, enabling lazy/delayed read of sub-scene tiles.
* _Deprecation_: Tile column functions (in `RasterFunctions`) and SQL registered names have all been renamed to follow `snake_case` conventions, matching SQL and Python. A temporary compatibility shim is included so that code built against 0.7.1 and earlier still work. These will be marked as deprecated.
* Added `withKryoSerialization` extension methods on `SparkSession.Builder` and `SparkConf`.
+* _Breaking_: In Scala and SQL, `..._scalar` functions (e.g. `local_add_scalar`) have been removed. Non-scalar forms now dynamically detect type of right hand side.
+* _Breaking_: `tileToArray` has been replaced with `tile_to_array_double` and `tile_to_array_int`.
+* Added `render_matrix` debugging function.
+* _Breaking_: renamed `agg_histogram` to `agg_approx_histogram`, `local_agg_stats` to `agg_local_stats`, `local_agg_max` to `agg_local_max`, `local_agg_min` to `agg_local_min`, `local_agg_mean` to `agg_local_mean`, `local_agg_data_cells` to `agg_local_data_cells`, `local_agg_no_data_cells` to `agg_local_no_data_cells`.
+* _Breaking_: `CellHistogram` no longer carries along approximate statistics, due to confusing behavior. Use `agg_stats` instead.
+* Introduced `LocalCellStatistics` class to wrap together results from `LocalStatsAggregate`.
+* _Breaking_: `TileDimensions` moved from `astraea.spark.rasterframes` to `astraea.spark.rasterframes.model`.
## 0.7.x
diff --git a/experimental/src/it/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8CatalogRelationTest.scala b/experimental/src/it/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8CatalogRelationTest.scala
index 3b5f43f14..e86376246 100644
--- a/experimental/src/it/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8CatalogRelationTest.scala
+++ b/experimental/src/it/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8CatalogRelationTest.scala
@@ -63,7 +63,7 @@ class L8CatalogRelationTest extends TestEnvironment {
it("should download geotiff as tiles") {
val b01 = scenes
.select($"*", read_tiles(l8_band_url("B1")))
- assert(b01.count() === 289)
+ assert(b01.count() === 1089)
}
}
}
diff --git a/experimental/src/it/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8RelationTest.scala b/experimental/src/it/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8RelationTest.scala
index e6f1ed89d..688866a93 100644
--- a/experimental/src/it/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8RelationTest.scala
+++ b/experimental/src/it/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8RelationTest.scala
@@ -50,10 +50,6 @@ class L8RelationTest extends TestEnvironment with BeforeAndAfterAll with BeforeA
scenes = sql(query).cache()
}
- after {
- spark.sparkContext.register()
- }
-
describe("Read L8 on PDS as a DataSource") {
it("should count scenes") {
assert(scenes.schema.size === 4)
diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/DownloadExpression.scala b/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/DownloadExpression.scala
index 124366dbf..32d55645b 100644
--- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/DownloadExpression.scala
+++ b/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/DownloadExpression.scala
@@ -63,7 +63,7 @@ case class DownloadExpression(override val child: Expression, colPrefix: String)
}
object DownloadExpression {
- import astraea.spark.rasterframes.encoders.SparkDefaultEncoders._
+ import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc
def apply(urlColumn: Column): TypedColumn[Any, Array[Byte]] =
new Column(
diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8Relation.scala b/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8Relation.scala
index 35337ca31..845f5aad3 100644
--- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8Relation.scala
+++ b/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/awspds/L8Relation.scala
@@ -24,7 +24,7 @@ import astraea.spark.rasterframes._
import astraea.spark.rasterframes.encoders.CatalystSerializer
import astraea.spark.rasterframes.encoders.CatalystSerializer._
import astraea.spark.rasterframes.experimental.datasource.awspds.L8Relation.Bands
-import astraea.spark.rasterframes.expressions.{RasterSourceToRasterRefs, URIToRasterSource}
+import astraea.spark.rasterframes.expressions.transformers.{RasterSourceToRasterRefs, URIToRasterSource}
import astraea.spark.rasterframes.ref.RasterRef
import astraea.spark.rasterframes.ref.RasterSource.ReadCallback
import astraea.spark.rasterframes.rules.SpatialFilters.{Contains, Intersects}
diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/awspds/package.scala b/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/awspds/package.scala
index 7ea77eb07..60b1169fc 100644
--- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/awspds/package.scala
+++ b/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/awspds/package.scala
@@ -21,7 +21,7 @@
package astraea.spark.rasterframes.experimental.datasource
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
-import astraea.spark.rasterframes.encoders.SparkDefaultEncoders._
+import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
/**
* Module support.
diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/package.scala b/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/package.scala
index ea5904f16..4e6129be4 100644
--- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/package.scala
+++ b/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/package.scala
@@ -22,7 +22,7 @@
package astraea.spark.rasterframes.experimental
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.rf.VersionShims
+import org.apache.spark.sql.rf.VersionShims._
/**
@@ -42,6 +42,6 @@ package object datasource {
// Expression-oriented functions have a different registration scheme
// Currently have to register with the `builtin` registry due to Spark data hiding.
val registry: FunctionRegistry = rf.registry(sqlContext)
- VersionShims.registerExpression(registry, "rf_read_tiles", ReadTilesExpression.apply)
+ registry.registerExpression[ReadTilesExpression]("rf_read_tiles")
}
}
diff --git a/project/ProjectPlugin.scala b/project/ProjectPlugin.scala
index 3a491200d..4f575edb2 100644
--- a/project/ProjectPlugin.scala
+++ b/project/ProjectPlugin.scala
@@ -40,7 +40,6 @@ object ProjectPlugin extends AutoPlugin {
rfSparkVersion in ThisBuild := "2.3.2" ,
rfGeoTrellisVersion in ThisBuild := "2.1.0",
rfGeoMesaVersion in ThisBuild := "2.1.0",
-
publishTo := sonatypePublishTo.value,
publishMavenStyle := true,
publishArtifact in (Compile, packageDoc) := true,
@@ -67,6 +66,12 @@ object ProjectPlugin extends AutoPlugin {
name = "Ben Guseman",
email = "bguseman@astraea.io",
url = url("http://www.astraea.io")
+ ),
+ Developer(
+ id = "vpipkt",
+ name = "Jason Brown",
+ email = "jbrown@astraea.io",
+ url = url("http://www.astraea.io")
)
),
initialCommands in console :=
@@ -106,17 +111,17 @@ object ProjectPlugin extends AutoPlugin {
MergeStrategy.rename
case PathList("META-INF", xs @ _*) ⇒
xs map {_.toLowerCase} match {
- case ("manifest.mf" :: Nil) | ("index.list" :: Nil) | ("dependencies" :: Nil) ⇒
+ case "manifest.mf" :: Nil | "index.list" :: Nil | "dependencies" :: Nil ⇒
MergeStrategy.discard
- case ps @ (x :: _) if ps.last.endsWith(".sf") || ps.last.endsWith(".dsa") ⇒
+ case ps @ _ :: _ if ps.last.endsWith(".sf") || ps.last.endsWith(".dsa") ⇒
MergeStrategy.discard
case "plexus" :: _ ⇒
MergeStrategy.discard
case "services" :: _ ⇒
MergeStrategy.filterDistinctLines
- case ("spring.schemas" :: Nil) | ("spring.handlers" :: Nil) ⇒
+ case "spring.schemas" :: Nil | "spring.handlers" :: Nil ⇒
MergeStrategy.filterDistinctLines
- case ("maven" :: rest ) if rest.lastOption.exists(_.startsWith("pom")) ⇒
+ case "maven" :: rest if rest.lastOption.exists(_.startsWith("pom")) ⇒
MergeStrategy.discard
case _ ⇒ MergeStrategy.deduplicate
}
@@ -126,8 +131,8 @@ object ProjectPlugin extends AutoPlugin {
)
def releaseSettings: Seq[Def.Setting[_]] = {
- val buildSite: (State) ⇒ State = releaseStepTask(makeSite in LocalProject("docs"))
- val publishSite: (State) ⇒ State = releaseStepTask(ghpagesPushSite in LocalProject("docs"))
+ val buildSite: State ⇒ State = releaseStepTask(makeSite in LocalProject("docs"))
+ val publishSite: State ⇒ State = releaseStepTask(ghpagesPushSite in LocalProject("docs"))
Seq(
releaseIgnoreUntrackedFiles := true,
releaseTagName := s"${version.value}",
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 4d079b71d..a3fd9b0e9 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -26,4 +26,5 @@ addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.1")
addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.7-astraea.1")
addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.1")
+addSbtPlugin("net.vonbuchholtz" % "sbt-dependency-check" % "0.2.10")
diff --git a/pyrasterframes/python/pyrasterframes/rasterfunctions.py b/pyrasterframes/python/pyrasterframes/rasterfunctions.py
index 1f96b3906..589b256d8 100644
--- a/pyrasterframes/python/pyrasterframes/rasterfunctions.py
+++ b/pyrasterframes/python/pyrasterframes/rasterfunctions.py
@@ -154,7 +154,7 @@ def _(data_tile, mask_tile, mask_value):
_rf_unique_functions = {
'array_to_tile': _create_arrayToTile(),
'assemble_tile': _create_assembleTile(),
- 'cellTypes': lambda: _context_call('cellTypes'),
+ 'cell_types': lambda: _context_call('cell_types'),
'convert_cell_type': _create_convertCellType(),
'explode_tiles': _create_explode_tiles(),
'explode_tiles_sample': _create_explode_tiles_sample(),
@@ -196,10 +196,13 @@ def _(data_tile, mask_tile, mask_value):
# ------- RasterFrames functions -------
'tile_dimensions': 'Query the number of (cols, rows) in a Tile.',
'envelope': 'Extracts the bounding box (envelope) of the geometry.',
- 'tile_to_int_array': 'Flattens Tile into an array of integers.',
- 'tile_to_double_array': 'Flattens Tile into an array of doubles.',
+ 'tile_to_int_array': 'Flattens Tile into an array of integers. Deprecated in favor of `tile_to_array_int`.',
+ 'tile_to_double_array': 'Flattens Tile into an array of doubles. Deprecated in favor of `tile_to_array_double`',
+ 'tile_to_array_int': 'Flattens Tile into an array of integers.',
+ 'tile_to_array_double': 'Flattens Tile into an array of doubles.',
'cell_type': 'Extract the Tile\'s cell type',
- 'agg_histogram': 'Compute the full column aggregate floating point histogram',
+ 'is_no_data_tile': 'Report if the Tile is entirely NODDATA cells',
+ 'agg_approx_histogram': 'Compute the full column aggregate floating point histogram',
'agg_stats': 'Compute the full column aggregate floating point statistics',
'agg_mean': 'Computes the column aggregate mean',
'agg_data_cells': 'Computes the number of non-NoData cells in a column',
@@ -219,11 +222,11 @@ def _(data_tile, mask_tile, mask_value):
'local_divide': 'Divide two Tiles',
'normalized_difference': 'Compute the normalized difference of two tiles',
'local_agg_stats': 'Compute cell-local aggregate descriptive statistics for a column of Tiles.',
- 'local_agg_max': 'Compute the cell-wise/local max operation between Tiles in a column.',
- 'local_agg_min': 'Compute the cellwise/local min operation between Tiles in a column.',
- 'local_agg_mean': 'Compute the cellwise/local mean operation between Tiles in a column.',
- 'local_agg_data_cells': 'Compute the cellwise/local count of non-NoData cells for all Tiles in a column.',
- 'local_agg_no_data_cells': 'Compute the cellwise/local count of NoData cells for all Tiles in a column.',
+ 'agg_local_max': 'Compute the cell-wise/local max operation between Tiles in a column.',
+ 'agg_local_min': 'Compute the cellwise/local min operation between Tiles in a column.',
+ 'agg_local_mean': 'Compute the cellwise/local mean operation between Tiles in a column.',
+ 'agg_local_data_cells': 'Compute the cellwise/local count of non-NoData cells for all Tiles in a column.',
+ 'agg_local_no_data_cells': 'Compute the cellwise/local count of NoData cells for all Tiles in a column.',
'mask': 'Where the mask (second) tile contains NODATA, replace values in the source (first) tile with NODATA.',
'inverse_mask': 'Where the mask (second) tile DOES NOT contain NODATA, replace values in the source (first) tile with NODATA.',
'local_less': 'Cellwise less than comparison between two tiles',
@@ -232,6 +235,17 @@ def _(data_tile, mask_tile, mask_value):
'local_greater_equal': 'Cellwise greater than or equal to comparison between two tiles',
'local_equal': 'Cellwise equality comparison between two tiles',
'local_unequal': 'Cellwise inequality comparison between two tiles',
+ 'round': 'Round cell values to the nearest integer without changing the cell type',
+ 'log': 'Performs cell-wise natural logarithm',
+ 'log10': 'Performs cell-wise logartithm with base 10',
+ 'log2': 'Performs cell-wise logartithm with base 2',
+ 'log1p': 'Performs natural logarithm of cell values plus one',
+ 'exp': 'Performs cell-wise exponential',
+ 'exp2': 'Compute 2 to the power of cell values',
+ 'exp10': 'Compute 10 to the power of cell values',
+ 'expm1': 'Performs cell-wise exponential, then subtract one',
+ 'resample': 'Resample tile to different size based on scalar factor or tile whose dimension to match',
+
# ------- JTS functions -------
# spatial constructors
'st_geomFromGeoHash': '',
diff --git a/pyrasterframes/python/pyrasterframes/types.py b/pyrasterframes/python/pyrasterframes/types.py
index c4c509f4a..b82cfb70c 100644
--- a/pyrasterframes/python/pyrasterframes/types.py
+++ b/pyrasterframes/python/pyrasterframes/types.py
@@ -160,15 +160,14 @@ def module(cls):
def scalaUDT(cls):
return 'org.apache.spark.sql.rf.TileUDT'
+ # NB: These will need implementations if UDFs are to be supported,
+ # preferably in numpy arrays.
def serialize(self, obj):
if (obj is None): return None
- return Row(obj.cellType().name().encode("UTF8"),
- obj.cols().toShort(),
- obj.rows().toShort(),
- obj.toBytes)
+ return None
def deserialize(self, datum):
- return RFContext._jvm_mirror().generate_tile(datum[0], datum[1], datum[2], datum[3])
+ return None
diff --git a/pyrasterframes/python/tests/PyRasterFramesTests.py b/pyrasterframes/python/tests/PyRasterFramesTests.py
index 954af08f3..b47e25577 100644
--- a/pyrasterframes/python/tests/PyRasterFramesTests.py
+++ b/pyrasterframes/python/tests/PyRasterFramesTests.py
@@ -98,7 +98,11 @@ def test_general(self):
.withColumn('sum', tile_sum(self.tileCol)) \
.withColumn('stats', tile_stats(self.tileCol)) \
.withColumn('envelope', envelope('bounds')) \
- .withColumn('ascii', render_ascii(self.tileCol))
+ .withColumn('ascii', render_ascii(self.tileCol)) \
+ .withColumn('log', log(self.tileCol)) \
+ .withColumn('exp', exp(self.tileCol)) \
+ .withColumn('expm1', expm1(self.tileCol)) \
+ .withColumn('round', round(self.tileCol))
df.show()
@@ -117,7 +121,7 @@ def test_aggregations(self):
agg_data_cells(self.tileCol),
agg_no_data_cells(self.tileCol),
agg_stats(self.tileCol),
- agg_histogram(self.tileCol)
+ agg_approx_histogram(self.tileCol)
)
aggs.show()
row = aggs.first()
@@ -126,7 +130,7 @@ def test_aggregations(self):
print(row['agg_data_cells(tile)'])
self.assertEqual(row['agg_data_cells(tile)'], 387000)
self.assertEqual(row['agg_no_data_cells(tile)'], 1000)
- self.assertEqual(row['agg_stats(tile)'].dataCells, row['agg_data_cells(tile)'])
+ self.assertEqual(row['agg_stats(tile)'].data_cells, row['agg_data_cells(tile)'])
def test_sql(self):
@@ -190,17 +194,28 @@ def test_maskByValue(self):
mask_value = 4
rf1 = self.rf.select(self.rf.tile,
- local_multiply_scalar_int(
+ local_multiply(
convert_cell_type(
local_greater_scalar_int(self.rf.tile, 25000),
"uint8"),
- mask_value).alias('mask'))
+ lit(mask_value)).alias('mask'))
rf2 = rf1.select(rf1.tile, mask_by_value(rf1.tile, rf1.mask, lit(mask_value)).alias('masked'))
result = rf2.agg(agg_no_data_cells(rf2.tile) < agg_no_data_cells(rf2.masked)) \
.collect()[0][0]
self.assertTrue(result)
+ def test_resample(self):
+ from pyspark.sql.functions import lit
+ result = self.rf.select(
+ tile_min(local_equal(
+ resample(resample(self.rf.tile, lit(2)), lit(0.5)),
+ self.rf.tile))
+ ).collect()[0][0]
+
+ self.assertTrue(result == 1) # short hand for all values are true
+
+
def suite():
functionTests = unittest.TestSuite()
functionTests.addTest(RasterFunctionsTest('test_identify_columns'))
@@ -212,6 +227,7 @@ def suite():
functionTests.addTest(RasterFunctionsTest('test_explode'))
functionTests.addTest(RasterFunctionsTest('test_sql'))
functionTests.addTest(RasterFunctionsTest('test_maskByValue'))
+ functionTests.addTest(RasterFunctionsTest('test_resample'))
return functionTests
diff --git a/pyrasterframes/src/main/scala/astraea/spark/rasterframes/py/PyRFContext.scala b/pyrasterframes/src/main/scala/astraea/spark/rasterframes/py/PyRFContext.scala
index e4255d515..b80a8a3f8 100644
--- a/pyrasterframes/src/main/scala/astraea/spark/rasterframes/py/PyRFContext.scala
+++ b/pyrasterframes/src/main/scala/astraea/spark/rasterframes/py/PyRFContext.scala
@@ -94,7 +94,13 @@ class PyRFContext(implicit sparkSession: SparkSession) extends RasterFunctions
*/
def cell_type(name: String): CellType = CellType.fromName(name)
- def cell_types: Seq[String] = astraea.spark.rasterframes.functions.cellTypes()
+ /**
+ * Convenience list of valid cell type strings
+ * @return Java List of String, which py4j can interpret as a python `list`
+ */
+ def cell_types = {
+ astraea.spark.rasterframes.functions.cellTypes().asJava
+ }
/** DESERIALIZATION **/
@@ -117,51 +123,51 @@ class PyRFContext(implicit sparkSession: SparkSession) extends RasterFunctions
def temporalKeyColumn(df: DataFrame): Column =
df.asRF.temporalKeyColumn.orNull
- def tile_to_int_array(col: Column): Column = tile_to_array[Int](col)
+ def tile_to_int_array(col: Column): Column = tile_to_array_int(col)
- def tile_to_double_array(col: Column): Column = tile_to_array[Double](col)
+ def tile_to_double_array(col: Column): Column = tile_to_array_double(col)
// All the scalar tile arithmetic functions
- def local_add_scalar(col: Column, scalar: Double): Column = local_add_scalar[Double](col, scalar)
+ def local_add_scalar(col: Column, scalar: Double): Column = local_add[Double](col, scalar)
- def local_add_scalar_int(col: Column, scalar: Int): Column = local_add_scalar[Int](col, scalar)
+ def local_add_scalar_int(col: Column, scalar: Int): Column = local_add[Int](col, scalar)
- def local_subtract_scalar(col: Column, scalar: Double): Column = local_subtract_scalar[Double](col, scalar)
+ def local_subtract_scalar(col: Column, scalar: Double): Column = local_subtract[Double](col, scalar)
- def local_subtract_scalar_int(col: Column, scalar: Int): Column = local_subtract_scalar[Int](col, scalar)
+ def local_subtract_scalar_int(col: Column, scalar: Int): Column = local_subtract[Int](col, scalar)
- def local_divide_scalar(col: Column, scalar: Double): Column = local_divide_scalar[Double](col, scalar)
+ def local_divide_scalar(col: Column, scalar: Double): Column = local_divide[Double](col, scalar)
- def local_divide_scalar_int(col: Column, scalar: Int): Column = local_divide_scalar[Int](col, scalar)
+ def local_divide_scalar_int(col: Column, scalar: Int): Column = local_divide[Int](col, scalar)
- def local_multiply_scalar(col: Column, scalar: Double): Column = local_multiply_scalar[Double](col, scalar)
+ def local_multiply_scalar(col: Column, scalar: Double): Column = local_multiply[Double](col, scalar)
- def local_multiply_scalar_int(col: Column, scalar: Int): Column = local_multiply_scalar[Int](col, scalar)
+ def local_multiply_scalar_int(col: Column, scalar: Int): Column = local_multiply[Int](col, scalar)
- def local_less_scalar(col: Column, scalar: Double): Column = local_less_scalar[Double](col, scalar)
+ def local_less_scalar(col: Column, scalar: Double): Column = local_less[Double](col, scalar)
- def local_less_scalar_int(col: Column, scalar: Int): Column = local_less_scalar[Int](col, scalar)
+ def local_less_scalar_int(col: Column, scalar: Int): Column = local_less[Int](col, scalar)
- def local_less_equal_scalar(col: Column, scalar: Double): Column = local_less_equal_scalar[Double](col, scalar)
+ def local_less_equal_scalar(col: Column, scalar: Double): Column = local_less_equal[Double](col, scalar)
- def local_less_equal_scalar_int(col: Column, scalar: Int): Column = local_less_equal_scalar[Int](col, scalar)
+ def local_less_equal_scalar_int(col: Column, scalar: Int): Column = local_less_equal[Int](col, scalar)
- def local_greater_scalar(col: Column, scalar: Double): Column = local_greater_scalar[Double](col, scalar)
+ def local_greater_scalar(col: Column, scalar: Double): Column = local_greater[Double](col, scalar)
- def local_greater_scalar_int(col: Column, scalar: Int): Column = local_greater_scalar[Int](col, scalar)
+ def local_greater_scalar_int(col: Column, scalar: Int): Column = local_greater[Int](col, scalar)
- def local_greater_equal_scalar(col: Column, scalar: Double): Column = local_greater_equal_scalar[Double](col, scalar)
+ def local_greater_equal_scalar(col: Column, scalar: Double): Column = local_greater_equal[Double](col, scalar)
- def local_greater_equal_scalar_int(col: Column, scalar: Int): Column = local_greater_equal_scalar[Int](col, scalar)
+ def local_greater_equal_scalar_int(col: Column, scalar: Int): Column = local_greater_equal[Int](col, scalar)
- def local_equal_scalar(col: Column, scalar: Double): Column = local_equal_scalar[Double](col, scalar)
+ def local_equal_scalar(col: Column, scalar: Double): Column = local_equal[Double](col, scalar)
- def local_equal_scalar_int(col: Column, scalar: Int): Column = local_equal_scalar[Int](col, scalar)
+ def local_equal_scalar_int(col: Column, scalar: Int): Column = local_equal[Int](col, scalar)
- def local_unequal_scalar(col: Column, scalar: Double): Column = local_unequal_scalar[Double](col, scalar)
+ def local_unequal_scalar(col: Column, scalar: Double): Column = local_unequal[Double](col, scalar)
- def local_unequal_scalar_int(col: Column, scalar: Int): Column = local_unequal_scalar[Int](col, scalar)
+ def local_unequal_scalar_int(col: Column, scalar: Int): Column = local_unequal[Int](col, scalar)
// return toRaster, get just the tile, and make an array out of it
def toIntRaster(df: DataFrame, colname: String, cols: Int, rows: Int): Array[Int] = {
diff --git a/version.sbt b/version.sbt
index a0bf63cbb..eb883072c 100644
--- a/version.sbt
+++ b/version.sbt
@@ -1 +1 @@
-version in ThisBuild := "0.8.0-RC3"
+version in ThisBuild := "0.8.0-RC4"