diff --git a/.circleci/config.yml b/.circleci/config.yml
index 32e5a1e18..e9d3844bf 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -5,7 +5,7 @@ _defaults: &defaults
environment:
TERM: dumb
docker:
- - image: circleci/openjdk:8-jdk
+ - image: s22s/rasterframes-circleci:latest
_setenv: &setenv
name: set CloudRepo credentials
@@ -19,81 +19,111 @@ _delenv: &unsetenv
_restore_cache: &restore_cache
keys:
- - v2-dependencies-{{ checksum "build.sbt" }}
- - v2-dependencies-
+ - v3-dependencies-{{ checksum "build.sbt" }}
+ - v3-dependencies-
_save_cache: &save_cache
- key: v2-dependencies--{{ checksum "build.sbt" }}
+ key: v3-dependencies--{{ checksum "build.sbt" }}
paths:
+ - ~/.cache/coursier
- ~/.ivy2/cache
- ~/.sbt
- - ~/.rf_cache
+ - ~/.local
jobs:
- staticAnalysis:
+ test:
<<: *defaults
-
+ resource_class: large
steps:
- checkout
- run: *setenv
- restore_cache:
<<: *restore_cache
- - run: cat /dev/null | sbt dependencyCheck
- - run: cat /dev/null | sbt --debug dumpLicenseReport
+ - run: ulimit -c unlimited -S
+ - run: cat /dev/null | sbt -batch core/test datasource/test experimental/test pyrasterframes/test
+ - run:
+ command: |
+ mkdir -p /tmp/core_dumps
+ cp core.* *.hs /tmp/core_dumps 2> /dev/null || true
+ when: on_fail
- - run: *unsetenv
+ - store_artifacts:
+ path: /tmp/core_dumps
+ - run: *unsetenv
- save_cache:
<<: *save_cache
- - store_artifacts:
- path: datasource/target/scala-2.11/dependency-check-report.html
- destination: dependency-check-report-datasource.html
- - store_artifacts:
- path: experimental/target/scala-2.11/dependency-check-report.html
- destination: dependency-check-report-experimental.html
- - store_artifacts:
- path: core/target/scala-2.11/dependency-check-report.html
- destination: dependency-check-report-core.html
- - store_artifacts:
- path: pyrasterframes/target/scala-2.11/dependency-check-report.html
- destination: dependency-check-report-pyrasterframes.html
- test:
+ docs:
<<: *defaults
- resource_class: large
+ resource_class: xlarge
steps:
- checkout
- run: *setenv
+
- restore_cache:
<<: *restore_cache
- - run: sudo apt-get install python-pip pandoc && pip install setuptools # required for pyrasterframes testing
- - run: cat /dev/null | sbt test
+ - run: ulimit -c unlimited -S
+ - run: pip3 install --quiet --user -r pyrasterframes/src/main/python/requirements.txt
+ - run:
+ command: cat /dev/null | sbt makeSite
+ no_output_timeout: 30m
+
+ - run:
+ command: |
+ mkdir -p /tmp/core_dumps
+ cp core.* *.hs /tmp/core_dumps 2> /dev/null || true
+ when: on_fail
+
+ - store_artifacts:
+ path: /tmp/core_dumps
+
+ - store_artifacts:
+ path: docs/target/site
+ destination: rf-site
- run: *unsetenv
+
- save_cache:
<<: *save_cache
- publish:
+ it:
<<: *defaults
- resource_class: large
+ resource_class: xlarge
steps:
- checkout
- run: *setenv
+
- restore_cache:
<<: *restore_cache
- - run: sudo apt-get install python-pip pandoc && pip install setuptools # required for pyrasterframes testing
- - run: cat /dev/null | sbt test
- - run: cat /dev/null | sbt publish
+ - run: ulimit -c unlimited -S
+ - run:
+ command: cat /dev/null | sbt it:test
+ no_output_timeout: 30m
+
+ - run:
+ command: |
+ mkdir -p /tmp/core_dumps
+ cp core.* *.hs /tmp/core_dumps 2> /dev/null || true
+ when: on_fail
+
+ - store_artifacts:
+ path: /tmp/core_dumps
- run: *unsetenv
+
- save_cache:
<<: *save_cache
- it:
- <<: *defaults
+ itWithoutGdal:
+ working_directory: ~/repo
+ environment:
+ TERM: dumb
+ docker:
+ - image: circleci/openjdk:8-jdk
resource_class: xlarge
steps:
- checkout
@@ -110,6 +140,36 @@ jobs:
- save_cache:
<<: *save_cache
+ staticAnalysis:
+ <<: *defaults
+
+ steps:
+ - checkout
+ - run: *setenv
+ - restore_cache:
+ <<: *restore_cache
+
+ - run: cat /dev/null | sbt dependencyCheck
+ - run: cat /dev/null | sbt --debug dumpLicenseReport
+
+ - run: *unsetenv
+
+ - save_cache:
+ <<: *save_cache
+ - store_artifacts:
+ path: datasource/target/scala-2.11/dependency-check-report.html
+ destination: dependency-check-report-datasource.html
+ - store_artifacts:
+ path: experimental/target/scala-2.11/dependency-check-report.html
+ destination: dependency-check-report-experimental.html
+ - store_artifacts:
+ path: core/target/scala-2.11/dependency-check-report.html
+ destination: dependency-check-report-core.html
+ - store_artifacts:
+ path: pyrasterframes/target/scala-2.11/dependency-check-report.html
+ destination: dependency-check-report-pyrasterframes.html
+
+
workflows:
version: 2
all:
@@ -119,20 +179,29 @@ workflows:
filters:
branches:
only:
- - /astraea\/feature\/.*-its/
- - publish:
+ - /feature\/.*-its/
+ - itWithoutGdal:
filters:
branches:
only:
- - astraea/develop
- nightlyReleaseAstraea:
+ - /feature\/.*-its/
+ - docs:
+ filters:
+ branches:
+ only:
+ - /feature\/.*docs.*/
+ - /docs\/.*/
+
+ nightly:
triggers:
- schedule:
cron: "0 8 * * *"
filters:
branches:
only:
- - astraea/develop
+ - develop
jobs:
- it
- - staticAnalysis
+ - itWithoutGdal
+ - docs
+# - staticAnalysis
diff --git a/.gitignore b/.gitignore
index ca41e7212..ff43c9712 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,5 @@ metastore_db
tour/jars
tour/*.tiff
scoverage-report*
+
+zz-*
diff --git a/.scalafmt.conf b/.scalafmt.conf
index 4d09e93c7..ca5e10394 100644
--- a/.scalafmt.conf
+++ b/.scalafmt.conf
@@ -1,10 +1,7 @@
maxColumn = 138
continuationIndent.defnSite = 2
-continuationIndent.callSite = 2
-continuationIndent.extendSite = 2
binPack.parentConstructors = true
binPack.literalArgumentLists = false
-binPack.unsafeCallSite = true
newlines.penalizeSingleSelectMultiArgList = false
newlines.sometimesBeforeColonInMethodReturnType = false
align.openParenCallSite = false
@@ -16,5 +13,4 @@ rewriteTokens {
}
optIn.selfAnnotationNewline = false
optIn.breakChainOnFirstMethodDot = true
-optIn.configStyleArguments = false
-importSelectors = BinPack
+importSelectors = BinPack
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
index fbe2823fa..12cad75b7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,37 +1,38 @@
sudo: false
+dist: xenial
+language: python
-language: scala
+python:
+ - "3.7"
cache:
directories:
- $HOME/.ivy2/cache
- $HOME/.sbt/boot
- $HOME/.rf_cache
+ - $HOME/.cache/coursier
scala:
- 2.11.11
-jdk:
- - oraclejdk8
+env:
+ - COURSIER_VERBOSITY=-1 JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
addons:
apt:
packages:
+ - openjdk-8-jdk
- pandoc
- - python-pip
install:
- - pip install setuptools
-
-sbt_args: -no-colors
+ - pip install rasterio shapely pandas numpy pweave
+ - wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf -
script:
- - sbt test
- - sbt it:test
+ - sbt/bin/sbt -java-home $JAVA_HOME -batch test
+ - sbt/bin/sbt -java-home $JAVA_HOME -batch it:test
# - sbt -Dfile.encoding=UTF8 clean coverage test coverageReport
# Tricks to avoid unnecessary cache updates
- find $HOME/.sbt -name "*.lock" | xargs rm
- find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
-#after_success:
-# - bash <(curl -s https://codecov.io/bash)
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 756d4cb8a..1be2dcdf7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,19 +1,10 @@
# Contributing to RasterFrames
-Thanks for your interest in this project.
+Community contributions are always welcome. To get started, please review this document,
+the [code of conduct](https://github.com/locationtech/rasterframes/blob/develop/CODE_OF_CONDUCT.md), and reach out to
+us on [gitter](https://gitter.im/locationtech/rasterframes) so the community can help you get started!
-## Project Description
-
-LocationTech RasterFrames brings the power of Spark DataFrames to geospatial
-raster data, empowered by the map algebra and tile layer operations of
-GeoTrellis. The underlying purpose of RasterFrames is to allow data scientists
-and software developers to process and analyze geospatial-temporal raster data
-with the same flexibility and ease as any other Spark Catalyst data type. At its
-core is a user-defined type (UDT) called TileUDT, which encodes a GeoTrellis
-Tile in a form the Spark Catalyst engine can process. Furthermore, we extend the
-definition of a DataFrame to encompass some additional invariants, allowing for
-geospatial operations within and between RasterFrames to occur, while still
-maintaining necessary geo-referencing constructs.
+The official home of RasterFrames under the Eclipse Foundation may be found here:
* https://projects.eclipse.org/projects/locationtech.rasterframes
@@ -58,19 +49,10 @@ commands are as follows:
* Build documentation: `sbt makeSite`
* Spark shell with RasterFrames initialized: `sbt console`
-
-## Contribution Process
-
-RasterFrames uses GitHub pull requests (PRs) for accepting contributions.
-Please fork the repository, create a branch, and submit a PR based off the `master` branch.
-During the PR review process comments may be attached. Please look out for comments
-and respond as necessary.
-
-
## Contact
Help, questions and community dialog are supported via Gitter:
- * https://gitter.im/s22s/raster-frames
+ * https://gitter.im/locationtech/rasterframes
Commercial support is available by writing to info@astraea.earth
diff --git a/LICENSE b/LICENSE
index 152d8c948..53d10c005 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
This software is licensed under the Apache 2 license, quoted below.
-Copyright 2017-2018 Astraea, Inc.
+Copyright 2017-2019 Astraea, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
diff --git a/README.md b/README.md
index dddeb94ae..2b3bcb43f 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,67 @@
-™
+®
- [![Join the chat at https://gitter.im/s22s/raster-frames](https://badges.gitter.im/s22s/raster-frames.svg)](https://gitter.im/s22s/raster-frames?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+ [![Join the chat at https://gitter.im/locationtech/rasterframes](https://badges.gitter.im/locationtech/rasterframes.svg)](https://gitter.im/locationtech/rasterframes?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-_RasterFrames™_ brings the power of Spark DataFrames to geospatial raster data, empowered by the map algebra and tile layer operations of [GeoTrellis](https://geotrellis.io/).
+RasterFrames® brings together Earth-observation (EO) data access, cloud computing, and DataFrame-based data science. The recent explosion of EO data from public and private satellite operators presents both a huge opportunity as well as a challenge to the data analysis community. It is _Big Data_ in the truest sense, and its footprint is rapidly getting bigger.
-
+RasterFrames provides a DataFrame-centric view over arbitrary raster data, enabling spatiotemporal queries, map algebra raster operations, and compatibility with the ecosystem of Spark ML algorithms. By using DataFrames as the core cognitive and compute data model, it is able to deliver these features in a form that is both accessible to general analysts and scalable along with the rapidly growing data footprint.
+
+
Please see the [Getting Started](http://rasterframes.io/getting-started.html) section of the Users' Manual to start using RasterFrames.
-## Documentation
+## User Resources
+
+* [RasterFrames Users' Manual](http://rasterframes.io/)
+* [RasterFrames Jupyter Notebook Docker Image](https://hub.docker.com/r/s22s/rasterframes-notebook/)
+* [Gitter Channel](https://gitter.im/locationtech/rasterframes)
+* [Submit an Issue](https://github.com/locationtech/rasterframes/issues)
+
+
+## Contributing
+
+Community contributions are always welcome. To get started, please review our [contribution guidelines](https://github.com/locationtech/rasterframes/blob/develop/CONTRIBUTING.md), [code of conduct](https://github.com/locationtech/rasterframes/blob/develop/CODE_OF_CONDUCT.md), and reach out to us on [gitter](https://gitter.im/locationtech/rasterframes) so the community can help you get started!
+
+RasterFrames is part of the LocationTech Stack.
+
+
+
+It is written in Scala, but with Python bindings. If you wish to contribute to the development of RasterFrames, or you
+wish to build it from scratch, you will need [sbt](https://www.scala-sbt.org/). Then clone the repository from GitHub.
+
+```bash
+git clone https://github.com/locationtech/rasterframes.git
+cd rasterframes
+```
+
+To publish to your local repository:
+
+```bash
+sbt publishLocal
+```
+
+You can run tests with
+
+```bash
+sbt test
+```
+
+and integration tests
+
+```bash
+sbt it:test
+```
+
+The documentation may be built with
+
+```bash
+sbt makeSite
+```
-* [Users' Manual](http://rasterframes.io/)
-* [API Documentation](http://rasterframes.io/latest/api/index.html)
-* [List of available UDFs](http://rasterframes.io/latest/api/index.html#astraea.spark.rasterframes.RasterFunctions)
-* [RasterFrames Jupyter Notebook Docker Image](https://hub.docker.com/r/s22s/rasterframes-notebooks/)
+Additional, Python sepcific build instruction may be found at [pyrasterframes/src/main/python/README.md](pyrasterframes/src/main/python/README.md)
## Copyright and License
-RasterFrames is released under the Apache 2.0 License, copyright Astraea, Inc. 2017-2018.
+RasterFrames is released under the Apache 2.0 License, copyright Astraea, Inc. 2017-2019.
diff --git a/bench/archive/jmh-results-20190528095237.json b/bench/archive/jmh-results-20190528095237.json
new file mode 100644
index 000000000..11d8f15f0
--- /dev/null
+++ b/bench/archive/jmh-results-20190528095237.json
@@ -0,0 +1,163 @@
+[
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEquals",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 421.72586734818816,
+ "scoreError" : 21.639640737136855,
+ "scoreConfidence" : [
+ 400.0862266110513,
+ 443.365508085325
+ ],
+ "scorePercentiles" : {
+ "0.0" : 416.0271304058273,
+ "50.0" : 420.27062086802925,
+ "90.0" : 430.09766576285773,
+ "95.0" : 430.09766576285773,
+ "99.0" : 430.09766576285773,
+ "99.9" : 430.09766576285773,
+ "99.99" : 430.09766576285773,
+ "99.999" : 430.09766576285773,
+ "99.9999" : 430.09766576285773,
+ "100.0" : 430.09766576285773
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 417.8839821936131,
+ 416.0271304058273,
+ 430.09766576285773,
+ 424.349937510613,
+ 420.27062086802925
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.resolveCRS",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 358.41316103726996,
+ "scoreError" : 21.21533668234991,
+ "scoreConfidence" : [
+ 337.19782435492004,
+ 379.6284977196199
+ ],
+ "scorePercentiles" : {
+ "0.0" : 351.9024799408263,
+ "50.0" : 356.47836323413975,
+ "90.0" : 364.61463628732025,
+ "95.0" : 364.61463628732025,
+ "99.0" : 364.61463628732025,
+ "99.9" : 364.61463628732025,
+ "99.99" : 364.61463628732025,
+ "99.999" : 364.61463628732025,
+ "99.9999" : 364.61463628732025,
+ "100.0" : 364.61463628732025
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 356.47836323413975,
+ 364.61463628732025,
+ 355.393162879192,
+ 363.67716284487153,
+ 351.9024799408263
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.selfEquals",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 431.7566794354569,
+ "scoreError" : 27.353383248644004,
+ "scoreConfidence" : [
+ 404.4032961868129,
+ 459.11006268410085
+ ],
+ "scorePercentiles" : {
+ "0.0" : 426.33479529843424,
+ "50.0" : 428.0558107251467,
+ "90.0" : 443.6223317651236,
+ "95.0" : 443.6223317651236,
+ "99.0" : 443.6223317651236,
+ "99.9" : 443.6223317651236,
+ "99.99" : 443.6223317651236,
+ "99.999" : 443.6223317651236,
+ "99.9999" : 443.6223317651236,
+ "100.0" : 443.6223317651236
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 443.6223317651236,
+ 433.04934952339687,
+ 426.33479529843424,
+ 427.72110986518294,
+ 428.0558107251467
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ }
+]
+
+
diff --git a/bench/archive/jmh-results-20190528102317.json b/bench/archive/jmh-results-20190528102317.json
new file mode 100644
index 000000000..20e5712a1
--- /dev/null
+++ b/bench/archive/jmh-results-20190528102317.json
@@ -0,0 +1,163 @@
+[
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEquals",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 7.084784138200969,
+ "scoreError" : 0.17086123184222066,
+ "scoreConfidence" : [
+ 6.913922906358748,
+ 7.255645370043189
+ ],
+ "scorePercentiles" : {
+ "0.0" : 7.044718220088579,
+ "50.0" : 7.07291741895685,
+ "90.0" : 7.153508778664247,
+ "95.0" : 7.153508778664247,
+ "99.0" : 7.153508778664247,
+ "99.9" : 7.153508778664247,
+ "99.99" : 7.153508778664247,
+ "99.999" : 7.153508778664247,
+ "99.9999" : 7.153508778664247,
+ "100.0" : 7.153508778664247
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 7.07291741895685,
+ 7.044718220088579,
+ 7.101541478172196,
+ 7.153508778664247,
+ 7.051234795122972
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.resolveCRS",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 0.07204597602344914,
+ "scoreError" : 0.014595696186190624,
+ "scoreConfidence" : [
+ 0.05745027983725852,
+ 0.08664167220963977
+ ],
+ "scorePercentiles" : {
+ "0.0" : 0.067949003349235,
+ "50.0" : 0.07168162461398803,
+ "90.0" : 0.07812749538776566,
+ "95.0" : 0.07812749538776566,
+ "99.0" : 0.07812749538776566,
+ "99.9" : 0.07812749538776566,
+ "99.99" : 0.07812749538776566,
+ "99.999" : 0.07812749538776566,
+ "99.9999" : 0.07812749538776566,
+ "100.0" : 0.07812749538776566
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 0.0701740653091496,
+ 0.07812749538776566,
+ 0.07229769145710743,
+ 0.07168162461398803,
+ 0.067949003349235
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.selfEquals",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 0.04160068241214439,
+ "scoreError" : 8.000367242705733E-4,
+ "scoreConfidence" : [
+ 0.04080064568787382,
+ 0.04240071913641496
+ ],
+ "scorePercentiles" : {
+ "0.0" : 0.0413462799613575,
+ "50.0" : 0.04153631854728124,
+ "90.0" : 0.04183927734451199,
+ "95.0" : 0.04183927734451199,
+ "99.0" : 0.04183927734451199,
+ "99.9" : 0.04183927734451199,
+ "99.99" : 0.04183927734451199,
+ "99.999" : 0.04183927734451199,
+ "99.9999" : 0.04183927734451199,
+ "100.0" : 0.04183927734451199
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 0.0413462799613575,
+ 0.04183927734451199,
+ 0.04153631854728124,
+ 0.0417885245223439,
+ 0.0414930116852273
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ }
+]
+
+
diff --git a/bench/archive/jmh-results-20190606082738.json b/bench/archive/jmh-results-20190606082738.json
new file mode 100644
index 000000000..b8e7467b1
--- /dev/null
+++ b/bench/archive/jmh-results-20190606082738.json
@@ -0,0 +1,269 @@
+[
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEqualsFalse",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 13.115296907959536,
+ "scoreError" : 2.5400077963191556,
+ "scoreConfidence" : [
+ 10.57528911164038,
+ 15.655304704278691
+ ],
+ "scorePercentiles" : {
+ "0.0" : 12.590614558661818,
+ "50.0" : 12.853830352008682,
+ "90.0" : 14.25982363939229,
+ "95.0" : 14.25982363939229,
+ "99.0" : 14.25982363939229,
+ "99.9" : 14.25982363939229,
+ "99.99" : 14.25982363939229,
+ "99.999" : 14.25982363939229,
+ "99.9999" : 14.25982363939229,
+ "100.0" : 14.25982363939229
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 12.853830352008682,
+ 12.590614558661818,
+ 12.829707376038487,
+ 14.25982363939229,
+ 13.042508613696407
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEqualsTrue",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 6.5973550106567345,
+ "scoreError" : 0.1946737881542353,
+ "scoreConfidence" : [
+ 6.402681222502499,
+ 6.7920287988109695
+ ],
+ "scorePercentiles" : {
+ "0.0" : 6.523477357639692,
+ "50.0" : 6.6063669572343695,
+ "90.0" : 6.648688182671118,
+ "95.0" : 6.648688182671118,
+ "99.0" : 6.648688182671118,
+ "99.9" : 6.648688182671118,
+ "99.99" : 6.648688182671118,
+ "99.999" : 6.648688182671118,
+ "99.9999" : 6.648688182671118,
+ "100.0" : 6.648688182671118
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 6.635409622463296,
+ 6.523477357639692,
+ 6.648688182671118,
+ 6.572832933275196,
+ 6.6063669572343695
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalLazyEqualsFalse",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 13.265730662256157,
+ "scoreError" : 1.915211216125259,
+ "scoreConfidence" : [
+ 11.350519446130898,
+ 15.180941878381416
+ ],
+ "scorePercentiles" : {
+ "0.0" : 12.850610015459289,
+ "50.0" : 13.13459015560355,
+ "90.0" : 14.120588306765669,
+ "95.0" : 14.120588306765669,
+ "99.0" : 14.120588306765669,
+ "99.9" : 14.120588306765669,
+ "99.99" : 14.120588306765669,
+ "99.999" : 14.120588306765669,
+ "99.9999" : 14.120588306765669,
+ "100.0" : 14.120588306765669
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 13.215365538647765,
+ 13.13459015560355,
+ 13.007499294804513,
+ 12.850610015459289,
+ 14.120588306765669
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalLazyEqualsTrue",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 0.040409137130485946,
+ "scoreError" : 0.007586963982726796,
+ "scoreConfidence" : [
+ 0.03282217314775915,
+ 0.047996101113212744
+ ],
+ "scorePercentiles" : {
+ "0.0" : 0.03867633630965359,
+ "50.0" : 0.04003336568127626,
+ "90.0" : 0.04377238392008154,
+ "95.0" : 0.04377238392008154,
+ "99.0" : 0.04377238392008154,
+ "99.9" : 0.04377238392008154,
+ "99.99" : 0.04377238392008154,
+ "99.999" : 0.04377238392008154,
+ "99.9999" : 0.04377238392008154,
+ "100.0" : 0.04377238392008154
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 0.03867633630965359,
+ 0.04003336568127626,
+ 0.04016005719940341,
+ 0.03940354254201491,
+ 0.04377238392008154
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.resolveCRS",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 0.06507083680791029,
+ "scoreError" : 0.003209489715829842,
+ "scoreConfidence" : [
+ 0.061861347092080445,
+ 0.06828032652374012
+ ],
+ "scorePercentiles" : {
+ "0.0" : 0.06425209212071442,
+ "50.0" : 0.06461825090771647,
+ "90.0" : 0.06612649264562556,
+ "95.0" : 0.06612649264562556,
+ "99.0" : 0.06612649264562556,
+ "99.9" : 0.06612649264562556,
+ "99.99" : 0.06612649264562556,
+ "99.999" : 0.06612649264562556,
+ "99.9999" : 0.06612649264562556,
+ "100.0" : 0.06612649264562556
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 0.06612649264562556,
+ 0.06579754694820603,
+ 0.06461825090771647,
+ 0.06425209212071442,
+ 0.06455980141728893
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ }
+]
+
+
diff --git a/bench/archive/jmh-results-20190606094001.json b/bench/archive/jmh-results-20190606094001.json
new file mode 100644
index 000000000..02cdd3194
--- /dev/null
+++ b/bench/archive/jmh-results-20190606094001.json
@@ -0,0 +1,269 @@
+[
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEqualsFalse",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 13.076857373851485,
+ "scoreError" : 1.6593497203225103,
+ "scoreConfidence" : [
+ 11.417507653528975,
+ 14.736207094173995
+ ],
+ "scorePercentiles" : {
+ "0.0" : 12.633328785860648,
+ "50.0" : 13.200439575276704,
+ "90.0" : 13.659196200240215,
+ "95.0" : 13.659196200240215,
+ "99.0" : 13.659196200240215,
+ "99.9" : 13.659196200240215,
+ "99.99" : 13.659196200240215,
+ "99.999" : 13.659196200240215,
+ "99.9999" : 13.659196200240215,
+ "100.0" : 13.659196200240215
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 13.659196200240215,
+ 12.665249239331997,
+ 13.200439575276704,
+ 13.226073068547855,
+ 12.633328785860648
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEqualsTrue",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 0.2775587837304895,
+ "scoreError" : 0.015896893581796353,
+ "scoreConfidence" : [
+ 0.2616618901486931,
+ 0.29345567731228583
+ ],
+ "scorePercentiles" : {
+ "0.0" : 0.2724269842972383,
+ "50.0" : 0.2775487008943729,
+ "90.0" : 0.2816631615036355,
+ "95.0" : 0.2816631615036355,
+ "99.0" : 0.2816631615036355,
+ "99.9" : 0.2816631615036355,
+ "99.99" : 0.2816631615036355,
+ "99.999" : 0.2816631615036355,
+ "99.9999" : 0.2816631615036355,
+ "100.0" : 0.2816631615036355
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 0.28157426698598376,
+ 0.2816631615036355,
+ 0.27458080497121706,
+ 0.2775487008943729,
+ 0.2724269842972383
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalLazyEqualsFalse",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 13.143184640034391,
+ "scoreError" : 0.6865674500293741,
+ "scoreConfidence" : [
+ 12.456617190005018,
+ 13.829752090063765
+ ],
+ "scorePercentiles" : {
+ "0.0" : 12.925994808467195,
+ "50.0" : 13.17768387931118,
+ "90.0" : 13.341295384511856,
+ "95.0" : 13.341295384511856,
+ "99.0" : 13.341295384511856,
+ "99.9" : 13.341295384511856,
+ "99.99" : 13.341295384511856,
+ "99.999" : 13.341295384511856,
+ "99.9999" : 13.341295384511856,
+ "100.0" : 13.341295384511856
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 13.341295384511856,
+ 13.17768387931118,
+ 12.925994808467195,
+ 12.995056782282637,
+ 13.27589234559909
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalLazyEqualsTrue",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 0.03659740987415034,
+ "scoreError" : 0.0011385555881718446,
+ "scoreConfidence" : [
+ 0.035458854285978496,
+ 0.037735965462322184
+ ],
+ "scorePercentiles" : {
+ "0.0" : 0.036314017083098636,
+ "50.0" : 0.0364980924818408,
+ "90.0" : 0.03691511396572689,
+ "95.0" : 0.03691511396572689,
+ "99.0" : 0.03691511396572689,
+ "99.9" : 0.03691511396572689,
+ "99.99" : 0.03691511396572689,
+ "99.999" : 0.03691511396572689,
+ "99.9999" : 0.03691511396572689,
+ "100.0" : 0.03691511396572689
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 0.036314017083098636,
+ 0.03635019813669222,
+ 0.0364980924818408,
+ 0.03691511396572689,
+ 0.03690962770339316
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ },
+ {
+ "jmhVersion" : "1.21",
+ "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.resolveCRS",
+ "mode" : "avgt",
+ "threads" : 1,
+ "forks" : 1,
+ "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java",
+ "jvmArgs" : [
+ "-Xmx2048M",
+ "-Xmx4g"
+ ],
+ "jdkVersion" : "1.8.0_171",
+ "vmName" : "Java HotSpot(TM) 64-Bit Server VM",
+ "vmVersion" : "25.171-b11",
+ "warmupIterations" : 8,
+ "warmupTime" : "10 s",
+ "warmupBatchSize" : 1,
+ "measurementIterations" : 5,
+ "measurementTime" : "10 s",
+ "measurementBatchSize" : 1,
+ "primaryMetric" : {
+ "score" : 0.06517305542168148,
+ "scoreError" : 0.005310236878903678,
+ "scoreConfidence" : [
+ 0.05986281854277779,
+ 0.07048329230058516
+ ],
+ "scorePercentiles" : {
+ "0.0" : 0.06355938701044708,
+ "50.0" : 0.06512777854120488,
+ "90.0" : 0.06700762802360496,
+ "95.0" : 0.06700762802360496,
+ "99.0" : 0.06700762802360496,
+ "99.9" : 0.06700762802360496,
+ "99.99" : 0.06700762802360496,
+ "99.999" : 0.06700762802360496,
+ "99.9999" : 0.06700762802360496,
+ "100.0" : 0.06700762802360496
+ },
+ "scoreUnit" : "us/op",
+ "rawData" : [
+ [
+ 0.06700762802360496,
+ 0.06512777854120488,
+ 0.06418521981870606,
+ 0.06598526371444442,
+ 0.06355938701044708
+ ]
+ ]
+ },
+ "secondaryMetrics" : {
+ }
+ }
+]
+
+
diff --git a/bench/src/main/resources/log4j.properties b/bench/src/main/resources/log4j.properties
index f8cb2c5b9..9ed1d66ca 100644
--- a/bench/src/main/resources/log4j.properties
+++ b/bench/src/main/resources/log4j.properties
@@ -18,7 +18,7 @@
#
# Set everything to be logged to the console
-log4j.rootCategory=TRACE, FILE
+log4j.rootCategory=INFO, FILE
log4j.appender.FILE=org.apache.log4j.FileAppender
log4j.appender.FILE.File=target/jmh-log.out
@@ -36,7 +36,7 @@ log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
-log4j.logger.astraea.spark=DEBUG
+log4j.logger.org.locationtech=DEBUG
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala b/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala
deleted file mode 100644
index c68c826e8..000000000
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes.bench
-
-
-import java.util.concurrent.TimeUnit
-
-import astraea.spark.rasterframes
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.transformers.RasterSourceToTiles
-import astraea.spark.rasterframes.ref.RasterSource
-import astraea.spark.rasterframes.ref.RasterSource.ReadCallback
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.spark.sql._
-import org.openjdk.jmh.annotations._
-/**
- *
- *
- * @since 11/1/18
- */
-@BenchmarkMode(Array(Mode.AverageTime))
-@State(Scope.Benchmark)
-@OutputTimeUnit(TimeUnit.MILLISECONDS)
-class RasterRefBench extends SparkEnv with LazyLogging {
- import spark.implicits._
-
- var expandedDF: DataFrame = _
- var singleDF: DataFrame = _
-
- @Setup(Level.Trial)
- def setupData(): Unit = {
- val watcher = new ReadCallback {
- var count: Long = 0
- var calls: Int = 0
- override def readRange(source: RasterSource, start: Long, length: Int): Unit = {
- calls += 1
- count += length
- logger.debug("%4d -- %,d bytes".format(calls, count))
- }
- }
-
- val r1 = RasterSource(remoteCOGSingleband1, Some(watcher))
- val r2 = RasterSource(remoteCOGSingleband2, Some(watcher))
- singleDF = Seq((r1, r2)).toDF("B1", "B2")
- .select(RasterSourceToTiles(false, $"B1", $"B2"))
-
- expandedDF = Seq((r1, r2)).toDF("B1", "B2")
- .select(RasterSourceToTiles(true, $"B1", $"B2"))
- }
-
- @Benchmark
- def computeDifferenceExpanded() = {
- expandedDF
- .select(normalized_difference($"B1", $"B2"))
- .cache()
- .count()
- }
-
- @Benchmark
- def computeDifferenceSingle() = {
- singleDF
- .select(normalized_difference($"B1", $"B2"))
- .cache()
- .count()
- }
-
- @Benchmark
- def computeStatsSingle() = {
- singleDF.select(agg_stats($"B1")).collect()
- }
-
- @Benchmark
- def computeStatsExpanded() = {
- expandedDF.select(agg_stats($"B1")).collect()
- }
-
- @Benchmark
- def computeDifferenceStats() = {
- singleDF.select(agg_stats(normalized_difference($"B1", $"B2"))).collect()
- }
-
-}
-
-object RasterRefBench {
-
-// import org.openjdk.jmh.runner.RunnerException
-// import org.openjdk.jmh.runner.options.OptionsBuilder
-//
-// @throws[RunnerException]
- def main(args: Array[String]): Unit = {
-
- val thing = new RasterRefBench()
- thing.setupData()
- rasterframes.util.time("compute stats expanded") {
- thing.computeStatsSingle()
- }
-
- rasterframes.util.time("compute stats single") {
- thing.computeStatsExpanded()
- }
-
- // val opt = new OptionsBuilder()
-// .include(classOf[RasterRefBench].getSimpleName)
-// .threads(4)
-// .forks(5)
-// .build()
-//
-// new Runner(opt).run()
- }
-}
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/BinaryTileOpBench.scala
similarity index 92%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/BinaryTileOpBench.scala
index 133d93356..dce7a7715 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/BinaryTileOpBench.scala
@@ -19,11 +19,12 @@
*
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
+
import java.util.concurrent.TimeUnit
-import astraea.spark.rasterframes.expressions.localops._
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes.expressions.localops._
+import org.locationtech.rasterframes._
import geotrellis.raster.Tile
import geotrellis.raster.mapalgebra.{local => gt}
import org.apache.spark.sql._
diff --git a/bench/src/main/scala/org/locationtech/rasterframes/bench/CRSBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/CRSBench.scala
new file mode 100644
index 000000000..424533b58
--- /dev/null
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/CRSBench.scala
@@ -0,0 +1,69 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.bench
+
+import java.util.concurrent.TimeUnit
+
+import geotrellis.proj4.{CRS, LatLng, WebMercator}
+import org.locationtech.proj4j.CoordinateReferenceSystem
+import org.locationtech.rasterframes.model.LazyCRS
+import org.openjdk.jmh.annotations._
+
+@BenchmarkMode(Array(Mode.AverageTime))
+@State(Scope.Benchmark)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+class CRSBench extends SparkEnv {
+
+ var crs1: CRS = _
+ var crs2: CRS = _
+
+ @Setup(Level.Invocation)
+ def setupData(): Unit = {
+ crs1 = LazyCRS("epsg:4326")
+ crs2 = LazyCRS(WebMercator.toProj4String)
+ }
+
+ @Benchmark
+ def resolveCRS(): CoordinateReferenceSystem = {
+ crs1.proj4jCrs
+ }
+
+ @Benchmark
+ def logicalEqualsTrue(): Boolean = {
+ crs1 == LatLng
+ }
+
+ @Benchmark
+ def logicalEqualsFalse(): Boolean = {
+ crs1 == WebMercator
+ }
+
+ @Benchmark
+ def logicalLazyEqualsTrue(): Boolean = {
+ crs1 == crs1
+ }
+
+ @Benchmark
+ def logicalLazyEqualsFalse(): Boolean = {
+ crs1 == crs2
+ }
+}
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/CatalystSerializerBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/CatalystSerializerBench.scala
similarity index 94%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/CatalystSerializerBench.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/CatalystSerializerBench.scala
index b4abad9bf..12a6b0486 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/CatalystSerializerBench.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/CatalystSerializerBench.scala
@@ -19,15 +19,15 @@
*
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
import java.util.concurrent.TimeUnit
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, StandardEncoders}
import geotrellis.proj4.{CRS, LatLng, Sinusoidal}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.locationtech.rasterframes.encoders.{CatalystSerializer, StandardEncoders}
import org.openjdk.jmh.annotations._
@BenchmarkMode(Array(Mode.AverageTime))
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/MultibandRenderBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/MultibandRenderBench.scala
similarity index 87%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/MultibandRenderBench.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/MultibandRenderBench.scala
index 383710205..8636a6a80 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/MultibandRenderBench.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/MultibandRenderBench.scala
@@ -19,12 +19,13 @@
*
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
+
import java.util.concurrent.TimeUnit
-import astraea.spark.rasterframes.util.MultibandRender.Landsat8NaturalColor
+import org.locationtech.rasterframes.util.MultibandRender.Landsat8NaturalColor
import geotrellis.raster._
-import geotrellis.raster.io.geotiff.{GeoTiff, MultibandGeoTiff}
+import geotrellis.raster.io.geotiff.MultibandGeoTiff
import org.apache.commons.io.IOUtils
import org.openjdk.jmh.annotations._
diff --git a/bench/src/main/scala/org/locationtech/rasterframes/bench/RasterRefBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/RasterRefBench.scala
new file mode 100644
index 000000000..448fab9c3
--- /dev/null
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/RasterRefBench.scala
@@ -0,0 +1,87 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2018 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.bench
+
+import java.util.concurrent.TimeUnit
+
+import com.typesafe.scalalogging.LazyLogging
+import org.apache.spark.sql._
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs
+import org.locationtech.rasterframes.expressions.transformers.RasterRefToTile
+import org.locationtech.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.ref.RasterSource
+import org.openjdk.jmh.annotations._
+
+@BenchmarkMode(Array(Mode.AverageTime))
+@State(Scope.Benchmark)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+class RasterRefBench extends SparkEnv with LazyLogging {
+ import spark.implicits._
+
+ var expandedDF: DataFrame = _
+ var singleDF: DataFrame = _
+
+ @Setup(Level.Trial)
+ def setupData(): Unit = {
+ val r1 = RasterSource(remoteCOGSingleband1)
+ val r2 = RasterSource(remoteCOGSingleband2)
+
+ singleDF = Seq((r1, r2)).toDF("B1", "B2")
+ .select(RasterRefToTile(RasterSourceToRasterRefs(Some(TileDimensions(r1.dimensions)), Seq(0), $"B1", $"B2")))
+
+ expandedDF = Seq((r1, r2)).toDF("B1", "B2")
+ .select(RasterRefToTile(RasterSourceToRasterRefs($"B1", $"B2")))
+ }
+
+ @Benchmark
+ def computeDifferenceExpanded() = {
+ expandedDF
+ .select(rf_normalized_difference($"B1", $"B2"))
+ .cache()
+ .count()
+ }
+
+ @Benchmark
+ def computeDifferenceSingle() = {
+ singleDF
+ .select(rf_normalized_difference($"B1", $"B2"))
+ .cache()
+ .count()
+ }
+
+ @Benchmark
+ def computeStatsSingle() = {
+ singleDF.select(rf_agg_stats($"B1")).collect()
+ }
+
+ @Benchmark
+ def computeStatsExpanded() = {
+ expandedDF.select(rf_agg_stats($"B1")).collect()
+ }
+
+ @Benchmark
+ def computeDifferenceStats() = {
+ singleDF.select(rf_agg_stats(rf_normalized_difference($"B1", $"B2"))).collect()
+ }
+
+}
\ No newline at end of file
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/SparkEnv.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/SparkEnv.scala
similarity index 92%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/SparkEnv.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/SparkEnv.scala
index 8b718479a..d3691f800 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/SparkEnv.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/SparkEnv.scala
@@ -15,11 +15,13 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import org.apache.spark.sql.SparkSession
import org.openjdk.jmh.annotations.{Level, TearDown}
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/StatsComputeBench.scala
similarity index 75%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/StatsComputeBench.scala
index c9aa7eef4..2ebc3efc0 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/StatsComputeBench.scala
@@ -15,14 +15,16 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
import java.util.concurrent.TimeUnit
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.stats.CellHistogram
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.stats.CellHistogram
import org.apache.spark.sql._
import org.openjdk.jmh.annotations._
@@ -57,26 +59,26 @@ class StatsComputeBench extends SparkEnv {
// @Benchmark
// def computeStats(): Array[CellStatistics] = {
-// tiles.select(agg_stats($"tile")).collect()
+// tiles.select(rf_agg_stats($"tile")).collect()
// }
@Benchmark
def computeHistogram(): Array[CellHistogram] = {
- tiles.select(agg_approx_histogram($"tile")).collect()
+ tiles.select(rf_agg_approx_histogram($"tile")).collect()
}
// @Benchmark
// def extractMean(): Array[Double] = {
-// tiles.select(agg_stats($"tile").getField("mean")).map(_.getDouble(0)).collect()
+// tiles.select(rf_agg_stats($"tile").getField("mean")).map(_.getDouble(0)).collect()
// }
//
// @Benchmark
// def directMean(): Array[Double] = {
-// tiles.repartition(10).select(agg_mean($"tile")).collect()
+// tiles.repartition(10).select(rf_agg_mean($"tile")).collect()
// }
// @Benchmark
// def computeCounts() = {
-// tiles.toDF("tile").select(data_cells($"tile") as "counts").agg(sum($"counts")).collect()
+// tiles.toDF("tile").select(rf_data_cells($"tile") as "counts").agg(sum($"counts")).collect()
// }
}
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileAssembleBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileAssembleBench.scala
similarity index 90%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/TileAssembleBench.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/TileAssembleBench.scala
index 8fe31ef0d..8b33af369 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileAssembleBench.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileAssembleBench.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea, Inc.
+ * Copyright 2017 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -19,10 +19,11 @@
*
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
+
import java.util.concurrent.TimeUnit
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.raster.ByteConstantNoDataCellType
import org.apache.spark.sql._
import org.openjdk.jmh.annotations._
@@ -44,7 +45,7 @@ class TileAssembleBench extends SparkEnv {
var cells1: DataFrame = _
var cells2: DataFrame = _
- val assembler = assemble_tile(
+ val assembler = rf_assemble_tile(
$"column_index", $"row_index", $"tile",
tileSize, tileSize, cellType
)
@@ -53,7 +54,7 @@ class TileAssembleBench extends SparkEnv {
def setupData(): Unit = {
cells1 = Seq.fill(numTiles)(randomTile(tileSize, tileSize, cellType.name)).zipWithIndex
.toDF("tile", "id")
- .select($"id", explode_tiles($"tile"))
+ .select($"id", rf_explode_tiles($"tile"))
.repartition(4, $"id")
.cache()
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileCellScanBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileCellScanBench.scala
similarity index 92%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/TileCellScanBench.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/TileCellScanBench.scala
index 64ee8716e..350ac811a 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileCellScanBench.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileCellScanBench.scala
@@ -15,15 +15,17 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
import java.util.concurrent.TimeUnit
-import astraea.spark.rasterframes.tiles.InternalRowTile
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.rf.TileUDT
+import org.locationtech.rasterframes.tiles.InternalRowTile
import org.openjdk.jmh.annotations._
@BenchmarkMode(Array(Mode.AverageTime))
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileEncodeBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileEncodeBench.scala
similarity index 85%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/TileEncodeBench.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/TileEncodeBench.scala
index 7f25235ae..a4b0a2595 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileEncodeBench.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileEncodeBench.scala
@@ -15,19 +15,22 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
import java.net.URI
import java.util.concurrent.TimeUnit
-import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
-import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
+import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile
+import org.locationtech.rasterframes.ref.RasterRef
import geotrellis.raster.Tile
import geotrellis.vector.Extent
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.locationtech.rasterframes.ref.{RasterRef, RasterSource}
import org.openjdk.jmh.annotations._
@BenchmarkMode(Array(Mode.AverageTime))
@@ -52,7 +55,7 @@ class TileEncodeBench extends SparkEnv {
cellTypeName match {
case "rasterRef" ⇒
val baseCOG = "https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/149/039/LC08_L1TP_149039_20170411_20170415_01_T1/LC08_L1TP_149039_20170411_20170415_01_T1_B1.TIF"
- tile = RasterRefTile(RasterRef(RasterSource(URI.create(baseCOG)), Some(Extent(253785.0, 3235185.0, 485115.0, 3471015.0))))
+ tile = RasterRefTile(RasterRef(RasterSource(URI.create(baseCOG)), 0, Some(Extent(253785.0, 3235185.0, 485115.0, 3471015.0))))
case _ ⇒
tile = randomTile(tileSize, tileSize, cellTypeName)
}
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileExplodeBench.scala
similarity index 86%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/TileExplodeBench.scala
index ebd4f169c..7f3352f69 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileExplodeBench.scala
@@ -15,12 +15,14 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.bench
+package org.locationtech.rasterframes.bench
import java.util.concurrent.TimeUnit
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.openjdk.jmh.annotations._
@@ -56,11 +58,11 @@ class TileExplodeBench extends SparkEnv {
@Benchmark
def arrayExplode() = {
- tiles.select(posexplode(tile_to_array_double($"tile"))).count()
+ tiles.select(posexplode(rf_tile_to_array_double($"tile"))).count()
}
@Benchmark
def tileExplode() = {
- tiles.select(explode_tiles($"tile")).count()
+ tiles.select(rf_explode_tiles($"tile")).count()
}
}
diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/package.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/package.scala
similarity index 97%
rename from bench/src/main/scala/astraea/spark/rasterframes/bench/package.scala
rename to bench/src/main/scala/org/locationtech/rasterframes/bench/package.scala
index 525c86734..65d8ab88f 100644
--- a/bench/src/main/scala/astraea/spark/rasterframes/bench/package.scala
+++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/package.scala
@@ -17,7 +17,7 @@
*
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
import java.net.URI
diff --git a/build.sbt b/build.sbt
index 05aed4e8d..5aa2e6009 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1,46 +1,163 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2017-2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
addCommandAlias("makeSite", "docs/makeSite")
+addCommandAlias("previewSite", "docs/previewSite")
+addCommandAlias("ghpagesPushSite", "docs/ghpagesPushSite")
addCommandAlias("console", "datasource/console")
+// Prefer our own IntegrationTest config definition, which inherits from Test.
+lazy val IntegrationTest = config("it") extend Test
+
lazy val root = project
.in(file("."))
.withId("RasterFrames")
.aggregate(core, datasource, pyrasterframes, experimental)
+ .enablePlugins(RFReleasePlugin)
.settings(publish / skip := true)
- .settings(releaseSettings)
-
-lazy val deployment = project
- .dependsOn(root)
- .disablePlugins(SparkPackagePlugin)
-lazy val IntegrationTest = config("it") extend Test
+lazy val `rf-notebook` = project
+ .dependsOn(pyrasterframes)
+ .enablePlugins(RFAssemblyPlugin, DockerPlugin)
+ .settings(publish / skip := true)
lazy val core = project
+ .enablePlugins(BuildInfoPlugin)
.configs(IntegrationTest)
.settings(inConfig(IntegrationTest)(Defaults.testSettings))
.settings(Defaults.itSettings)
- .disablePlugins(SparkPackagePlugin)
+ .settings(
+ moduleName := "rasterframes",
+ libraryDependencies ++= Seq(
+ shapeless,
+ `jts-core`,
+ geomesa("z3").value,
+ geomesa("spark-jts").value,
+ `geotrellis-contrib-vlm`,
+ `geotrellis-contrib-gdal`,
+ spark("core").value % Provided,
+ spark("mllib").value % Provided,
+ spark("sql").value % Provided,
+ geotrellis("spark").value,
+ geotrellis("raster").value,
+ geotrellis("s3").value,
+ geotrellis("spark-testkit").value % Test excludeAll (
+ ExclusionRule(organization = "org.scalastic"),
+ ExclusionRule(organization = "org.scalatest")
+ ),
+ scaffeine,
+ scalatest
+ ),
+ buildInfoKeys ++= Seq[BuildInfoKey](
+ moduleName, version, scalaVersion, sbtVersion, rfGeoTrellisVersion, rfGeoMesaVersion, rfSparkVersion
+ ),
+ buildInfoPackage := "org.locationtech.rasterframes",
+ buildInfoObject := "RFBuildInfo",
+ buildInfoOptions := Seq(
+ BuildInfoOption.ToMap,
+ BuildInfoOption.BuildTime,
+ BuildInfoOption.ToJson
+ )
+ )
lazy val pyrasterframes = project
.dependsOn(core, datasource, experimental)
- .settings(assemblySettings)
+ .enablePlugins(RFAssemblyPlugin, PythonBuildPlugin)
+ .settings(
+ libraryDependencies ++= Seq(
+ geotrellis("s3").value,
+ spark("core").value % Provided,
+ spark("mllib").value % Provided,
+ spark("sql").value % Provided
+ )
+ )
lazy val datasource = project
+ .configs(IntegrationTest)
+ .settings(Defaults.itSettings)
.dependsOn(core % "test->test;compile->compile")
- .disablePlugins(SparkPackagePlugin)
+ .settings(
+ moduleName := "rasterframes-datasource",
+ libraryDependencies ++= Seq(
+ geotrellis("s3").value,
+ spark("core").value % Provided,
+ spark("mllib").value % Provided,
+ spark("sql").value % Provided
+ ),
+ initialCommands in console := (initialCommands in console).value +
+ """
+ |import org.locationtech.rasterframes.datasource.geotrellis._
+ |import org.locationtech.rasterframes.datasource.geotiff._
+ |""".stripMargin
+ )
lazy val experimental = project
.configs(IntegrationTest)
.settings(Defaults.itSettings)
.dependsOn(core % "test->test;it->test;compile->compile")
.dependsOn(datasource % "test->test;it->test;compile->compile")
- .disablePlugins(SparkPackagePlugin)
+ .settings(
+ moduleName := "rasterframes-experimental",
+ libraryDependencies ++= Seq(
+ geotrellis("s3").value,
+ spark("core").value % Provided,
+ spark("mllib").value % Provided,
+ spark("sql").value % Provided
+ ),
+ fork in IntegrationTest := true,
+ javaOptions in IntegrationTest := Seq("-Xmx2G"),
+ parallelExecution in IntegrationTest := false
+ )
lazy val docs = project
- .dependsOn(core, datasource)
- .disablePlugins(SparkPackagePlugin)
+ .dependsOn(core, datasource, pyrasterframes)
+ .enablePlugins(SiteScaladocPlugin, ParadoxPlugin, GhpagesPlugin, ScalaUnidocPlugin)
+ .settings(
+ apiURL := Some(url("http://rasterframes.io/latest/api")),
+ autoAPIMappings := true,
+ ghpagesNoJekyll := true,
+ ScalaUnidoc / siteSubdirName := "latest/api",
+ paradox / siteSubdirName := ".",
+ paradoxProperties ++= Map(
+ "github.base_url" -> "https://github.com/locationtech/rasterframes",
+ "version" -> version.value,
+ "scaladoc.org.apache.spark.sql.rf" -> "http://rasterframes.io/latest"
+ ),
+ paradoxNavigationExpandDepth := Some(3),
+ paradoxTheme := Some(builtinParadoxTheme("generic")),
+ makeSite := makeSite
+ .dependsOn(Compile / unidoc)
+ .dependsOn((Compile / paradox)
+ .dependsOn(pyrasterframes / doc)
+ ).value,
+ Compile / paradox / sourceDirectories += (pyrasterframes / Python / doc / target).value,
+ )
+ .settings(
+ addMappingsToSiteDir(ScalaUnidoc / packageDoc / mappings, ScalaUnidoc / siteSubdirName)
+ )
+ .settings(
+ addMappingsToSiteDir(Compile / paradox / mappings, paradox / siteSubdirName)
+ )
lazy val bench = project
.dependsOn(core % "compile->test")
- .disablePlugins(SparkPackagePlugin)
.settings(publish / skip := true)
diff --git a/build/circleci/Dockerfile b/build/circleci/Dockerfile
new file mode 100644
index 000000000..334c1b15f
--- /dev/null
+++ b/build/circleci/Dockerfile
@@ -0,0 +1,74 @@
+FROM circleci/openjdk:8-jdk
+
+ENV OPENJPEG_VERSION 2.3.0
+ENV GDAL_VERSION 2.4.1
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
+
+# most of these libraries required for
+# python-pip pandoc && pip install setuptools => required for pyrasterframes testing
+RUN sudo apt-get update && \
+ sudo apt remove \
+ python python-minimal python2.7 python2.7-minimal \
+ libpython-stdlib libpython2.7 libpython2.7-minimal libpython2.7-stdlib \
+ && sudo apt-get install -y \
+ python3 \
+ python3-pip \
+ pandoc \
+ wget \
+ gcc g++ build-essential \
+ libcurl4-gnutls-dev \
+ libproj-dev \
+ libgeos-dev \
+ libhdf4-alt-dev \
+ libhdf5-serial-dev \
+ bash-completion \
+ cmake \
+ imagemagick \
+ libpng-dev \
+ swig \
+ ant \
+ && sudo apt autoremove \
+ && sudo apt-get clean all \
+ && pip3 install setuptools ipython==6.2.1 \
+ && sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 1
+
+# install OpenJPEG
+RUN cd /tmp && \
+ wget https://github.com/uclouvain/openjpeg/archive/v${OPENJPEG_VERSION}.tar.gz && \
+ tar -xf v${OPENJPEG_VERSION}.tar.gz && \
+ cd openjpeg-${OPENJPEG_VERSION}/ && \
+ mkdir build && \
+ cd build && \
+ cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/ && \
+ make -j && \
+ sudo make install && \
+ cd /tmp && rm -Rf v${OPENJPEG_VERSION}.tar.gz openjpeg*
+
+# Compile and install GDAL with Java bindings
+RUN cd /tmp && \
+ wget http://download.osgeo.org/gdal/${GDAL_VERSION}/gdal-${GDAL_VERSION}.tar.gz && \
+ tar -xf gdal-${GDAL_VERSION}.tar.gz && \
+ cd gdal-${GDAL_VERSION} && \
+ ./configure \
+ --with-curl \
+ --with-hdf4 \
+ --with-hdf5 \
+ --with-geos \
+ --with-geotiff=internal \
+ --with-hide-internal-symbols \
+ --with-java=$JAVA_HOME \
+ --with-libtiff=internal \
+ --with-libz=internal \
+ --with-mrf \
+ --with-openjpeg \
+ --with-threads \
+ --without-jp2mrsid \
+ --without-netcdf \
+ --without-ecw \
+ && \
+ make -j 8 && \
+ sudo make install && \
+ cd swig/java && \
+ sudo make install && \
+ sudo ldconfig && \
+ cd /tmp && sudo rm -Rf gdal*
diff --git a/build/circleci/README.md b/build/circleci/README.md
new file mode 100644
index 000000000..69b9cdff3
--- /dev/null
+++ b/build/circleci/README.md
@@ -0,0 +1,6 @@
+# CircleCI Dockerfile Build file
+
+```bash
+docker build -t s22s/rasterframes-circleci:latest .
+docker push s22s/rasterframes-circleci:latest
+```
diff --git a/core/build.sbt b/core/build.sbt
deleted file mode 100644
index eb5164045..000000000
--- a/core/build.sbt
+++ /dev/null
@@ -1,35 +0,0 @@
-enablePlugins(BuildInfoPlugin)
-
-moduleName := "rasterframes"
-
-libraryDependencies ++= Seq(
- "com.chuusai" %% "shapeless" % "2.3.2",
- "org.locationtech.geomesa" %% "geomesa-z3" % rfGeoMesaVersion.value,
- "org.locationtech.geomesa" %% "geomesa-spark-jts" % rfGeoMesaVersion.value exclude("jgridshift", "jgridshift"),
-
- spark("core").value % Provided,
- spark("mllib").value % Provided,
- spark("sql").value % Provided,
- geotrellis("spark").value,
- geotrellis("raster").value,
- geotrellis("s3").value,
- geotrellis("spark-testkit").value % Test excludeAll (
- ExclusionRule(organization = "org.scalastic"),
- ExclusionRule(organization = "org.scalatest")
- ),
- scalaTest
-)
-
-buildInfoKeys ++= Seq[BuildInfoKey](
- name, version, scalaVersion, sbtVersion, rfGeoTrellisVersion, rfGeoMesaVersion, rfSparkVersion
-)
-
-buildInfoPackage := "astraea.spark.rasterframes"
-
-buildInfoObject := "RFBuildInfo"
-
-buildInfoOptions := Seq(
- BuildInfoOption.ToMap,
- BuildInfoOption.BuildTime
-)
-
diff --git a/core/src/it/resources/log4j.properties b/core/src/it/resources/log4j.properties
index 378ae8e61..1135e4b34 100644
--- a/core/src/it/resources/log4j.properties
+++ b/core/src/it/resources/log4j.properties
@@ -37,8 +37,8 @@ log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
-log4j.logger.astraea.spark.rasterframes=DEBUG
-log4j.logger.astraea.spark.rasterframes.ref=TRACE
+log4j.logger.org.locationtech.rasterframes=WARN
+log4j.logger.org.locationtech.rasterframes.ref=WARN
log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
diff --git a/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala b/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala
deleted file mode 100644
index 6f9069183..000000000
--- a/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2019 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes.ref
-
-import java.net.URI
-
-import astraea.spark.rasterframes.TestEnvironment.ReadMonitor
-import astraea.spark.rasterframes.ref.RasterSource.FileGeoTiffRasterSource
-import astraea.spark.rasterframes.{TestData, TestEnvironment}
-import geotrellis.raster.io.geotiff.GeoTiff
-import geotrellis.vector.Extent
-import org.apache.spark.sql.rf.RasterSourceUDT
-
-/**
- *
- *
- * @since 8/22/18
- */
-class RasterSourceIT extends TestEnvironment with TestData {
- def sub(e: Extent) = {
- val c = e.center
- val w = e.width
- val h = e.height
- Extent(c.x, c.y, c.x + w * 0.1, c.y + h * 0.1)
- }
-
- describe("RasterSource.readAll") {
- it("should return consistently ordered tiles across bands for a given scene") {
- // These specific scenes exhibit the problem where we see different subtile segment ordering across the bands of a given scene.
- val rURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B4.TIF")
- val bURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B2.TIF")
-
- val red = RasterSource(rURI).readAll().left.get
- val blue = RasterSource(bURI).readAll().left.get
-
- red should not be empty
- red.size should equal(blue.size)
-
- red.map(_.dimensions) should contain theSameElementsAs blue.map(_.dimensions)
- }
- }
-}
diff --git a/core/src/it/scala/org/locationtech/rasterframes/ref/RasterSourceIT.scala b/core/src/it/scala/org/locationtech/rasterframes/ref/RasterSourceIT.scala
new file mode 100644
index 000000000..ae8b0b1d4
--- /dev/null
+++ b/core/src/it/scala/org/locationtech/rasterframes/ref/RasterSourceIT.scala
@@ -0,0 +1,126 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import java.lang.Math.ceil
+import java.net.URI
+
+import org.locationtech.rasterframes
+import org.locationtech.rasterframes.util.time
+import org.locationtech.rasterframes.{NOMINAL_TILE_SIZE, TestData, TestEnvironment}
+
+/**
+ *
+ *
+ * @since 8/22/18
+ */
+class RasterSourceIT extends TestEnvironment with TestData {
+
+ describe("RasterSource.readAll") {
+ it("should return consistently ordered tiles across bands for a given scene") {
+ time(s"two band comparison prefer-gdal=${ rasterframes.rfConfig.getBoolean("prefer-gdal")}") {
+ // These specific scenes exhibit the problem where we see different subtile segment ordering across the bands of a given scene.
+ val rURI = new URI(
+ "https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B4.TIF")
+ val bURI = new URI(
+ "https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B2.TIF")
+ val red = time("read B4") {
+ RasterSource(rURI).readAll()
+ }
+ val blue = time("read B2") {
+ RasterSource(bURI).readAll()
+ }
+ time("test empty") {
+ red should not be empty
+ }
+ time("compare sizes") {
+ red.size should equal(blue.size)
+ }
+ time("compare dimensions") {
+ red.map(_.dimensions) should contain theSameElementsAs blue.map(_.dimensions)
+ }
+ }
+ }
+ }
+
+ if (GDALRasterSource.hasGDAL) {
+ println("GDAL version: " + GDALRasterSource.gdalVersion())
+
+ describe("GDAL support") {
+
+
+ it("should read JPEG2000 scene") {
+ RasterSource(localSentinel).readAll().flatMap(_.tile.statisticsDouble).size should be(64)
+ }
+
+ it("should read small MRF scene with one band converted from MODIS HDF") {
+ val (expectedTileCount, _) = expectedTileCountAndBands(2400, 2400)
+ RasterSource(modisConvertedMrfPath).readAll().flatMap(_.tile.statisticsDouble).size should be (expectedTileCount)
+ }
+
+ it("should read remote HTTP MRF scene") {
+ val (expectedTileCount, bands) = expectedTileCountAndBands(6257, 7584, 4)
+ RasterSource(remoteHttpMrfPath).readAll(bands = bands).flatMap(_.tile.statisticsDouble).size should be (expectedTileCount)
+ }
+
+ it("should read remote S3 MRF scene") {
+ val (expectedTileCount, bands) = expectedTileCountAndBands(6257, 7584, 4)
+ RasterSource(remoteS3MrfPath).readAll(bands = bands).flatMap(_.tile.statisticsDouble).size should be (expectedTileCount)
+ }
+ }
+ } else {
+ describe("GDAL missing error support") {
+ it("should throw exception reading JPEG2000 scene") {
+ intercept[IllegalArgumentException] {
+ RasterSource(localSentinel)
+ }
+ }
+
+ it("should throw exception reading MRF scene with one band converted from MODIS HDF") {
+ intercept[IllegalArgumentException] {
+ RasterSource(modisConvertedMrfPath)
+ }
+ }
+
+ it("should throw exception reading remote HTTP MRF scene") {
+ intercept[IllegalArgumentException] {
+ RasterSource(remoteHttpMrfPath)
+ }
+ }
+
+ it("should throw exception reading remote S3 MRF scene") {
+ intercept[IllegalArgumentException] {
+ RasterSource(remoteS3MrfPath)
+ }
+ }
+ }
+ }
+
+ private def expectedTileCountAndBands(x:Int, y:Int, bandCount:Int = 1) = {
+ val imageDimensions = Seq(x.toDouble, y.toDouble)
+ val tilesPerBand = imageDimensions.map(x ⇒ ceil(x / NOMINAL_TILE_SIZE)).product
+ val bands = Range(0, bandCount)
+ val expectedTileCount = tilesPerBand * bands.length
+ (expectedTileCount, bands)
+ }
+
+}
diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf
index 980088e28..e7d3e57f5 100644
--- a/core/src/main/resources/reference.conf
+++ b/core/src/main/resources/reference.conf
@@ -1,3 +1,19 @@
rasterframes {
- nominal-tile-size: 256
+ nominal-tile-size = 256
+ prefer-gdal = true
+ showable-tiles = true
+ showable-max-cells = 20
+ raster-source-cache-timeout = 120 seconds
+}
+
+vlm.gdal {
+ options {
+ // See https://trac.osgeo.org/gdal/wiki/ConfigOptions for options
+ //CPL_DEBUG = "OFF"
+ AWS_REQUEST_PAYER = "requester"
+ GDAL_DISABLE_READDIR_ON_OPEN = "YES"
+ CPL_VSIL_CURL_ALLOWED_EXTENSIONS = ".tif,.tiff,.jp2,.mrf,.idx,.lrc,.mrf.aux.xml,.vrt"
+ }
+ // set this to `false` if CPL_DEBUG is `ON`
+ useExceptions = true
}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/MetadataKeys.scala b/core/src/main/scala/astraea/spark/rasterframes/MetadataKeys.scala
deleted file mode 100644
index 2b4948798..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/MetadataKeys.scala
+++ /dev/null
@@ -1,13 +0,0 @@
-package astraea.spark.rasterframes
-
-/**
- *
- * @since 2/19/18
- */
-trait MetadataKeys {
- /** Key under which ContextRDD metadata is stored. */
- private[rasterframes] val CONTEXT_METADATA_KEY = "_context"
-
- /** Key under which RasterFrame role a column plays. */
- private[rasterframes] val SPATIAL_ROLE_KEY = "_stRole"
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala b/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala
deleted file mode 100644
index ff08dd44c..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package astraea.spark.rasterframes
-
-import astraea.spark.rasterframes.expressions.TileAssembler
-import astraea.spark.rasterframes.expressions.accessors._
-import astraea.spark.rasterframes.expressions.aggstats._
-import astraea.spark.rasterframes.expressions.generators._
-import astraea.spark.rasterframes.expressions.localops._
-import astraea.spark.rasterframes.expressions.tilestats._
-import astraea.spark.rasterframes.expressions.transformers._
-import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics}
-import astraea.spark.rasterframes.{functions => F}
-import com.vividsolutions.jts.geom.{Envelope, Geometry}
-import geotrellis.proj4.CRS
-import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
-import geotrellis.raster.{CellType, Tile}
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql._
-import org.apache.spark.sql.functions._
-
-/**
- * UDFs for working with Tiles in Spark DataFrames.
- *
- * @since 4/3/17
- */
-trait RasterFunctions {
- import util._
- import PrimitiveEncoders._
-
- // format: off
- /** Create a row for each cell in Tile. */
- def explode_tiles(cols: Column*): Column = explode_tiles_sample(1.0, None, cols: _*)
-
- /** Create a row for each cell in Tile with random sampling and optional seed. */
- def explode_tiles_sample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column =
- ExplodeTiles(sampleFraction, seed, cols)
-
- /** Create a row for each cell in Tile with random sampling (no seed). */
- def explode_tiles_sample(sampleFraction: Double, cols: Column*): Column =
- ExplodeTiles(sampleFraction, None, cols)
-
- /** Query the number of (cols, rows) in a Tile. */
- def tile_dimensions(col: Column): Column = GetDimensions(col)
-
- /** Extracts the bounding box of a geometry as a JTS envelope. */
- def envelope(col: Column): TypedColumn[Any, Envelope] = GetEnvelope(col)
-
- /** Flattens Tile into a double array. */
- def tile_to_array_double(col: Column): TypedColumn[Any, Array[Double]] =
- TileToArrayDouble(col)
-
- /** Flattens Tile into an integer array. */
- def tile_to_array_int(col: Column): TypedColumn[Any, Array[Double]] =
- TileToArrayDouble(col)
-
- @Experimental
- /** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/
- def array_to_tile(arrayCol: Column, cols: Int, rows: Int) = withAlias("array_to_tile", arrayCol)(
- udf[Tile, AnyRef](F.arrayToTile(cols, rows)).apply(arrayCol)
- )
-
- /** Create a Tile from a column of cell data with location indexes and preform cell conversion. */
- def assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] =
- convert_cell_type(TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile](singlebandTileEncoder)
-
- /** Create a Tile from a column of cell data with location indexes. */
- def assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] =
- TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows)
-
- /** Extract the Tile's cell type */
- def cell_type(col: Column): TypedColumn[Any, CellType] = GetCellType(col)
-
- /** Change the Tile's cell type */
- def convert_cell_type(col: Column, cellType: CellType): TypedColumn[Any, Tile] =
- SetCellType(col, cellType)
-
- /** Change the Tile's cell type */
- def convert_cell_type(col: Column, cellTypeName: String): TypedColumn[Any, Tile] =
- SetCellType(col, cellTypeName)
-
- /** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */
- def bounds_geometry(bounds: Column): TypedColumn[Any, Geometry] = BoundsToGeometry(bounds)
-
- /** Assign a `NoData` value to the Tiles. */
- def with_no_data(col: Column, nodata: Double): TypedColumn[Any, Tile] = withAlias("with_no_data", col)(
- udf[Tile, Tile](F.withNoData(nodata)).apply(col)
- ).as[Tile]
-
- /** Compute the full column aggregate floating point histogram. */
- def agg_approx_histogram(col: Column): TypedColumn[Any, CellHistogram] =
- HistogramAggregate(col)
-
- /** Compute the full column aggregate floating point statistics. */
- def agg_stats(col: Column): TypedColumn[Any, CellStatistics] =
- CellStatsAggregate(col)
-
- /** Computes the column aggregate mean. */
- def agg_mean(col: Column) = CellMeanAggregate(col)
-
- /** Computes the number of non-NoData cells in a column. */
- def agg_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.DataCells(col)
-
- /** Computes the number of NoData cells in a column. */
- def agg_no_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.NoDataCells(col)
-
- /** Compute the Tile-wise mean */
- def tile_mean(col: Column): TypedColumn[Any, Double] =
- TileMean(col)
-
- /** Compute the Tile-wise sum */
- def tile_sum(col: Column): TypedColumn[Any, Double] =
- Sum(col)
-
- /** Compute the minimum cell value in tile. */
- def tile_min(col: Column): TypedColumn[Any, Double] =
- TileMin(col)
-
- /** Compute the maximum cell value in tile. */
- def tile_max(col: Column): TypedColumn[Any, Double] =
- TileMax(col)
-
- /** Compute TileHistogram of Tile values. */
- def tile_histogram(col: Column): TypedColumn[Any, CellHistogram] =
- TileHistogram(col)
-
- /** Compute statistics of Tile values. */
- def tile_stats(col: Column): TypedColumn[Any, CellStatistics] =
- TileStats(col)
-
- /** Counts the number of non-NoData cells per Tile. */
- def data_cells(tile: Column): TypedColumn[Any, Long] =
- DataCells(tile)
-
- /** Counts the number of NoData cells per Tile. */
- def no_data_cells(tile: Column): TypedColumn[Any, Long] =
- NoDataCells(tile)
-
- def is_no_data_tile(tile: Column): TypedColumn[Any, Boolean] =
- IsNoDataTile(tile)
-
- /** Compute cell-local aggregate descriptive statistics for a column of Tiles. */
- def agg_local_stats(col: Column) =
- LocalStatsAggregate(col)
-
- /** Compute the cell-wise/local max operation between Tiles in a column. */
- def agg_local_max(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMaxUDAF(col)
-
- /** Compute the cellwise/local min operation between Tiles in a column. */
- def agg_local_min(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMinUDAF(col)
-
- /** Compute the cellwise/local mean operation between Tiles in a column. */
- def agg_local_mean(col: Column): TypedColumn[Any, Tile] = LocalMeanAggregate(col)
-
- /** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */
- def agg_local_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalDataCellsUDAF(col)
-
- /** Compute the cellwise/local count of NoData cells for all Tiles in a column. */
- def agg_local_no_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalNoDataCellsUDAF(col)
-
- /** Cellwise addition between two Tiles or Tile and scalar column. */
- def local_add(left: Column, right: Column): TypedColumn[Any, Tile] = Add(left, right)
-
- /** Cellwise addition of a scalar value to a tile. */
- def local_add[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Add(tileCol, value)
-
- /** Cellwise subtraction between two Tiles. */
- def local_subtract(left: Column, right: Column): TypedColumn[Any, Tile] = Subtract(left, right)
-
- /** Cellwise subtraction of a scalar value from a tile. */
- def local_subtract[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Subtract(tileCol, value)
-
- /** Cellwise multiplication between two Tiles. */
- def local_multiply(left: Column, right: Column): TypedColumn[Any, Tile] = Multiply(left, right)
-
- /** Cellwise multiplication of a tile by a scalar value. */
- def local_multiply[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Multiply(tileCol, value)
-
- /** Cellwise division between two Tiles. */
- def local_divide(left: Column, right: Column): TypedColumn[Any, Tile] = Divide(left, right)
-
- /** Cellwise division of a tile by a scalar value. */
- def local_divide[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Divide(tileCol, value)
-
- /** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */
- def local_algebra(op: LocalTileBinaryOp, left: Column, right: Column):
- TypedColumn[Any, Tile] =
- withAlias(opName(op), left, right)(
- udf[Tile, Tile, Tile](op.apply).apply(left, right)
- ).as[Tile]
-
- /** Compute the normalized difference of two tile columns */
- def normalized_difference(left: Column, right: Column) =
- NormalizedDifference(left, right)
-
- /** Constructor for constant tile column */
- def make_constant_tile(value: Number, cols: Int, rows: Int, cellType: String): TypedColumn[Any, Tile] =
- udf(() => F.makeConstantTile(value, cols, rows, cellType)).apply().as(s"constant_$cellType").as[Tile]
-
- /** Alias for column of constant tiles of zero */
- def tile_zeros(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] =
- udf(() => F.tileZeros(cols, rows, cellType)).apply().as(s"zeros_$cellType").as[Tile]
-
- /** Alias for column of constant tiles of one */
- def tile_ones(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] =
- udf(() => F.tileOnes(cols, rows, cellType)).apply().as(s"ones_$cellType").as[Tile]
-
- /** Where the mask tile contains NODATA, replace values in the source tile with NODATA */
- def mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
- Mask.MaskByDefined(sourceTile, maskTile)
-
- /** Where the mask tile equals the mask value, replace values in the source tile with NODATA */
- def mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
- Mask.MaskByValue(sourceTile, maskTile, maskValue)
-
- /** Where the mask tile DOES NOT contain NODATA, replace values in the source tile with NODATA */
- def inverse_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
- Mask.InverseMaskByDefined(sourceTile, maskTile)
-
- /** Create a tile where cells in the grid defined by cols, rows, and bounds are filled with the given value. */
- def rasterize(geometry: Column, bounds: Column, value: Column, cols: Int, rows: Int): TypedColumn[Any, Tile] =
- withAlias("rasterize", geometry)(
- udf(F.rasterize(_: Geometry, _: Geometry, _: Int, cols, rows)).apply(geometry, bounds, value)
- ).as[Tile]
-
- /** Reproject a column of geometry from one CRS to another. */
- def reproject_geometry(sourceGeom: Column, srcCRS: CRS, dstCRSCol: Column): TypedColumn[Any, Geometry] =
- ReprojectGeometry(sourceGeom, srcCRS, dstCRSCol)
-
- /** Reproject a column of geometry from one CRS to another. */
- def reproject_geometry(sourceGeom: Column, srcCRSCol: Column, dstCRS: CRS): TypedColumn[Any, Geometry] =
- ReprojectGeometry(sourceGeom, srcCRSCol, dstCRS)
-
- /** Reproject a column of geometry from one CRS to another. */
- def reproject_geometry(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] =
- ReprojectGeometry(sourceGeom, srcCRS, dstCRS)
-
- /** Render Tile as ASCII string, for debugging purposes. */
- def render_ascii(col: Column): TypedColumn[Any, String] =
- DebugRender.RenderAscii(col)
-
- /** Render Tile cell values as numeric values, for debugging purposes. */
- def render_matrix(col: Column): TypedColumn[Any, String] =
- DebugRender.RenderMatrix(col)
-
- /** Cellwise less than value comparison between two tiles. */
- def local_less(left: Column, right: Column): TypedColumn[Any, Tile] =
- Less(left, right)
-
- /** Cellwise less than value comparison between a tile and a scalar. */
- def local_less[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
- Less(tileCol, value)
-
- /** Cellwise less than or equal to value comparison between a tile and a scalar. */
- def local_less_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
- LessEqual(left, right)
-
- /** Cellwise less than or equal to value comparison between a tile and a scalar. */
- def local_less_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
- LessEqual(tileCol, value)
-
- /** Cellwise greater than value comparison between two tiles. */
- def local_greater(left: Column, right: Column): TypedColumn[Any, Tile] =
- Greater(left, right)
-
- /** Cellwise greater than value comparison between a tile and a scalar. */
- def local_greater[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
- Greater(tileCol, value)
-
- /** Cellwise greater than or equal to value comparison between two tiles. */
- def local_greater_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
- GreaterEqual(left, right)
-
- /** Cellwise greater than or equal to value comparison between a tile and a scalar. */
- def local_greater_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
- GreaterEqual(tileCol, value)
-
- /** Cellwise equal to value comparison between two tiles. */
- def local_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
- Equal(left, right)
-
- /** Cellwise equal to value comparison between a tile and a scalar. */
- def local_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
- Equal(tileCol, value)
-
- /** Cellwise inequality comparison between two tiles. */
- def local_unequal(left: Column, right: Column): TypedColumn[Any, Tile] =
- Unequal(left, right)
-
- /** Cellwise inequality comparison between a tile and a scalar. */
- def local_unequal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
- Unequal(tileCol, value)
-
- /** Round cell values to nearest integer without chaning cell type. */
- def round(tileCol: Column): TypedColumn[Any, Tile] =
- Round(tileCol)
-
- /** Take natural logarithm of cell values. */
- def log(tileCol: Column): TypedColumn[Any, Tile] =
- Log(tileCol)
-
- /** Take base 10 logarithm of cell values. */
- def log10(tileCol: Column): TypedColumn[Any, Tile] =
- Log10(tileCol)
-
- /** Take base 2 logarithm of cell values. */
- def log2(tileCol: Column): TypedColumn[Any, Tile] =
- Log2(tileCol)
-
- /** Natural logarithm of one plus cell values. */
- def log1p(tileCol: Column): TypedColumn[Any, Tile] =
- Log1p(tileCol)
-
- /** Exponential of cell values */
- def exp(tileCol: Column): TypedColumn[Any, Tile] =
- Exp(tileCol)
-
- /** Ten to the power of cell values */
- def exp10(tileCol: Column): TypedColumn[Any, Tile] =
- Exp10(tileCol)
-
- /** Two to the power of cell values */
- def exp2(tileCol: Column): TypedColumn[Any, Tile] =
- Exp2(tileCol)
-
- /** Exponential of cell values, less one*/
- def expm1(tileCol: Column): TypedColumn[Any, Tile] =
- ExpM1(tileCol)
-
- /** Resample tile using nearest-neighbor */
- def resample[T: Numeric](tileCol: Column, value: T) = Resample(tileCol, value)
-
- /** Resample tile using nearest-neighbor */
- def resample(tileCol: Column, column2: Column) = Resample(tileCol, column2)
-
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala b/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala
deleted file mode 100644
index 340b17198..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala
+++ /dev/null
@@ -1,63 +0,0 @@
-package astraea.spark.rasterframes
-
-import java.sql.Timestamp
-
-import geotrellis.raster.{Tile, TileFeature}
-import geotrellis.spark.{SpatialKey, TemporalKey}
-import org.apache.spark.sql.functions.col
-import com.vividsolutions.jts.geom.{Point => jtsPoint, Polygon => jtsPolygon}
-import geotrellis.proj4.CRS
-import geotrellis.vector.Extent
-import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
-
-/**
- * Constants identifying column in most RasterFrames.
- *
- * @since 2/19/18
- */
-trait StandardColumns {
- /** Default RasterFrame spatial column name. */
- val SPATIAL_KEY_COLUMN = col("spatial_key").as[SpatialKey]
-
- /** Default RasterFrame temporal column name. */
- val TEMPORAL_KEY_COLUMN = col("temporal_key").as[TemporalKey]
-
- /** Default RasterFrame timestamp column name */
- val TIMESTAMP_COLUMN = col("timestamp").as[Timestamp]
-
-
- /** Default RasterFrame column name for an tile bounds value. */
- // This is a `def` because `PolygonUDT` needs to be initialized first.
- def BOUNDS_COLUMN = col("bounds").as[jtsPolygon]
-
- /** Default RasterFrame column name for the center coordinates of the tile's bounds. */
- // This is a `def` because `PointUDT` needs to be initialized first.
- def CENTER_COLUMN = col("center").as[jtsPoint]
-
- /** Default Extent column name. */
- def EXTENT_COLUMN = col("extent").as[Extent]
-
- /** Default CRS column name. */
- def CRS_COLUMN = col("crs").as[CRS]
-
- /** Default RasterFrame column name for an added spatial index. */
- val SPATIAL_INDEX_COLUMN = col("spatial_index").as[Long]
-
- /** Default RasterFrame tile column name. */
- // This is a `def` because `TileUDT` needs to be initialized first.
- def TILE_COLUMN = col("tile").as[Tile]
-
- /** Default RasterFrame [[TileFeature.data]] column name. */
- val TILE_FEATURE_DATA_COLUMN = col("tile_data")
-
- /** Default GeoTiff tags column. */
- val METADATA_COLUMN = col("metadata").as[Map[String, String]]
-
- /** Default column index column for the cells of exploded tiles. */
- val COLUMN_INDEX_COLUMN = col("column_index").as[Int]
-
- /** Default teil column index column for the cells of exploded tiles. */
- val ROW_INDEX_COLUMN = col("row_index").as[Int]
-}
-
-object StandardColumns extends StandardColumns
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala
deleted file mode 100644
index 2581f8be5..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2019 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes.expressions.transformers
-
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.ref.RasterRef
-import astraea.spark.rasterframes.util._
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.rf._
-import org.apache.spark.sql.types.{DataType, StructField, StructType}
-import org.apache.spark.sql.{Column, TypedColumn}
-
-import scala.util.control.NonFatal
-
-/**
- * Accepts RasterRef and generates one or more RasterRef instances representing the
- * native internal sub-tiling, if any (and requested).
- *
- * @since 9/6/18
- */
-case class RasterSourceToRasterRefs(children: Seq[Expression], applyTiling: Boolean) extends Expression
- with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging {
-
- private val RasterSourceType = new RasterSourceUDT()
- private val rasterRefSchema = CatalystSerializer[RasterRef].schema
-
- override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType)
- override def nodeName: String = "raster_source_to_raster_ref"
-
- override def elementSchema: StructType = StructType(
- children.map(e ⇒ StructField(e.name, rasterRefSchema, false))
- )
-
- override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
- try {
- val refs = children.map { child ⇒
- val src = RasterSourceType.deserialize(child.eval(input))
- if (applyTiling) src.nativeTiling.map(e ⇒ RasterRef(src, Some(e))) else Seq(RasterRef(src))
- }
- refs.transpose.map(ts ⇒ InternalRow(ts.map(_.toInternalRow): _*))
- }
- catch {
- case NonFatal(ex) ⇒
- logger.error("Error fetching data for " + input, ex)
- Traversable.empty
- }
- }
-}
-
-object RasterSourceToRasterRefs {
- def apply(rrs: Column*): TypedColumn[Any, RasterRef] = apply(true, rrs: _*)
- def apply(applyTiling: Boolean, rrs: Column*): TypedColumn[Any, RasterRef] =
- new Column(new RasterSourceToRasterRefs(rrs.map(_.expr), applyTiling)).as[RasterRef]
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala
deleted file mode 100644
index 2b1caa3ba..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2019 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes.expressions.transformers
-
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.util._
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.rf._
-import org.apache.spark.sql.types.{DataType, StructField, StructType}
-
-import scala.util.control.NonFatal
-
-/**
- * Accepts RasterRef and generates one or more RasterRef instances representing the
- * native internal sub-tiling, if any (and requested).
- *
- * @since 9/6/18
- */
-case class RasterSourceToTiles(children: Seq[Expression], applyTiling: Boolean) extends Expression
- with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging {
-
- private val RasterSourceType = new RasterSourceUDT()
- private val TileType = new TileUDT()
-
- override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType)
- override def nodeName: String = "raster_source_to_tile"
-
- override def elementSchema: StructType = StructType(
- children.map(e ⇒ StructField(e.name, TileType, true))
- )
-
- override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
- implicit val ser = TileUDT.tileSerializer
-
- try {
- val refs = children.map { child ⇒
- val src = RasterSourceType.deserialize(child.eval(input))
- val tiles = if (applyTiling) src.readAll() else {
- src.read(src.extent).right.map(Seq(_)).left.map(Seq(_))
- }
-
- require(tiles.isLeft, "Multiband tiles are not yet supported")
-
- tiles.left.get
- }
- refs.transpose.map(ts ⇒ InternalRow(ts.map(r ⇒ r.tile.toInternalRow): _*))
- }
- catch {
- case NonFatal(ex) ⇒
- logger.error("Error fetching data for " + sql, ex)
- Traversable.empty
- }
- }
-}
-
-
-object RasterSourceToTiles {
- def apply(rrs: Column*): Column = apply(true, rrs: _*)
- def apply(applyTiling: Boolean, rrs: Column*): Column =
- new Column(new RasterSourceToTiles(rrs.map(_.expr), applyTiling))
-}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/ContextRDDMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/ContextRDDMethods.scala
deleted file mode 100644
index ef0c901f8..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/ContextRDDMethods.scala
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package astraea.spark.rasterframes.extensions
-
-import astraea.spark.rasterframes.PairRDDConverter._
-import astraea.spark.rasterframes.StandardColumns._
-import astraea.spark.rasterframes.extensions.Implicits._
-import astraea.spark.rasterframes.util._
-import astraea.spark.rasterframes.{PairRDDConverter, RasterFrame}
-import geotrellis.raster.{CellGrid, Tile}
-import geotrellis.spark._
-import geotrellis.spark.io._
-import geotrellis.util.MethodExtensions
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SparkSession
-
-/**
- * Extension method on `ContextRDD`-shaped RDDs with appropriate context bounds to create a RasterFrame.
- * @since 7/18/17
- */
-abstract class SpatialContextRDDMethods[T <: CellGrid](implicit spark: SparkSession)
- extends MethodExtensions[RDD[(SpatialKey, T)] with Metadata[TileLayerMetadata[SpatialKey]]] {
- import PairRDDConverter._
-
- def toRF(implicit converter: PairRDDConverter[SpatialKey, T]): RasterFrame = toRF(TILE_COLUMN.columnName)
-
- def toRF(tileColumnName: String)(implicit converter: PairRDDConverter[SpatialKey, T]): RasterFrame = {
- val df = self.toDataFrame.setSpatialColumnRole(SPATIAL_KEY_COLUMN, self.metadata)
- val defName = TILE_COLUMN.columnName
- df.mapWhen(_ ⇒ tileColumnName != defName, _.withColumnRenamed(defName, tileColumnName))
- .certify
- }
-}
-
-/**
- * Extension method on `ContextRDD`-shaped [[Tile]] RDDs keyed with [[SpaceTimeKey]], with appropriate context bounds to create a RasterFrame.
- * @since 9/11/17
- */
-abstract class SpatioTemporalContextRDDMethods[T <: CellGrid](
- implicit spark: SparkSession)
- extends MethodExtensions[RDD[(SpaceTimeKey, T)] with Metadata[TileLayerMetadata[SpaceTimeKey]]] {
-
- def toRF(implicit converter: PairRDDConverter[SpaceTimeKey, T]): RasterFrame = toRF(TILE_COLUMN.columnName)
-
- def toRF(tileColumnName: String)(implicit converter: PairRDDConverter[SpaceTimeKey, T]): RasterFrame = {
- val df = self.toDataFrame
- .setSpatialColumnRole(SPATIAL_KEY_COLUMN, self.metadata)
- .setTemporalColumnRole(TEMPORAL_KEY_COLUMN)
- val defName = TILE_COLUMN.columnName
- df.mapWhen(_ ⇒ tileColumnName != defName, _.withColumnRenamed(defName, tileColumnName))
- .certify
- }
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala
deleted file mode 100644
index ca38322ac..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package astraea.spark.rasterframes.extensions
-
-import astraea.spark.rasterframes.StandardColumns._
-import astraea.spark.rasterframes.util._
-import astraea.spark.rasterframes.{MetadataKeys, RasterFrame}
-import geotrellis.raster.Tile
-import geotrellis.spark.io._
-import geotrellis.spark.{SpaceTimeKey, SpatialComponent, SpatialKey, TemporalKey, TileLayerMetadata}
-import geotrellis.util.MethodExtensions
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.rf.TileUDT
-import org.apache.spark.sql.types.{MetadataBuilder, StructField}
-import org.apache.spark.sql.{Column, DataFrame, TypedColumn}
-import spray.json.JsonFormat
-import astraea.spark.rasterframes.encoders.StandardEncoders._
-import scala.util.Try
-
-/**
- * Extension methods over [[DataFrame]].
- *
- * @since 7/18/17
- */
-trait DataFrameMethods[DF <: DataFrame] extends MethodExtensions[DF] with MetadataKeys {
- import Implicits.{WithDataFrameMethods, WithMetadataBuilderMethods, WithMetadataMethods, WithRasterFrameMethods}
-
- private def selector(column: Column) = (attr: Attribute) ⇒
- attr.name == column.columnName || attr.semanticEquals(column.expr)
-
- /** Map over the Attribute representation of Columns, modifying the one matching `column` with `op`. */
- private[astraea] def mapColumnAttribute(column: Column, op: Attribute ⇒ Attribute): DF = {
- val analyzed = self.queryExecution.analyzed.output
- val selects = selector(column)
- val attrs = analyzed.map { attr ⇒
- if(selects(attr)) op(attr) else attr
- }
- self.select(attrs.map(a ⇒ new Column(a)): _*).asInstanceOf[DF]
- }
-
- private[astraea] def addColumnMetadata(column: Column, op: MetadataBuilder ⇒ MetadataBuilder): DF = {
- mapColumnAttribute(column, attr ⇒ {
- val md = new MetadataBuilder().withMetadata(attr.metadata)
- attr.withMetadata(op(md).build)
- })
- }
-
- private[astraea] def fetchMetadataValue[D](column: Column, reader: (Attribute) ⇒ D): Option[D] = {
- val analyzed = self.queryExecution.analyzed.output
- analyzed.find(selector(column)).map(reader)
- }
-
- private[astraea]
- def setSpatialColumnRole[K: SpatialComponent: JsonFormat](
- column: Column, md: TileLayerMetadata[K]): DF =
- addColumnMetadata(column,
- _.attachContext(md.asColumnMetadata).tagSpatialKey
- )
-
- private[astraea]
- def setTemporalColumnRole(column: Column): DF =
- addColumnMetadata(column, _.tagTemporalKey)
-
- /** Get the role tag the column plays in the RasterFrame, if any. */
- private[astraea]
- def getColumnRole(column: Column): Option[String] =
- fetchMetadataValue(column, _.metadata.getString(SPATIAL_ROLE_KEY))
-
- /** Get the columns that are of type `Tile` */
- def tileColumns: Seq[TypedColumn[Any, Tile]] =
- self.schema.fields
- .filter(_.dataType.typeName.equalsIgnoreCase(TileUDT.typeName))
- .map(f ⇒ col(f.name).as[Tile])
-
- /** Get the columns that are not of type `Tile` */
- def notTileColumns: Seq[Column] =
- self.schema.fields
- .filterNot(_.dataType.typeName.equalsIgnoreCase(TileUDT.typeName))
- .map(f ⇒ col(f.name))
-
- /** Get the spatial column. */
- def spatialKeyColumn: Option[TypedColumn[Any, SpatialKey]] = {
- val key = findSpatialKeyField
- key
- .map(_.name)
- .map(col(_).as[SpatialKey])
- }
-
- /** Get the temporal column, if any. */
- def temporalKeyColumn: Option[TypedColumn[Any, TemporalKey]] = {
- val key = findTemporalKeyField
- key.map(_.name).map(col(_).as[TemporalKey])
- }
-
- /** Find the field tagged with the requested `role` */
- private[rasterframes] def findRoleField(role: String): Option[StructField] =
- self.schema.fields.find(
- f ⇒
- f.metadata.contains(SPATIAL_ROLE_KEY) &&
- f.metadata.getString(SPATIAL_ROLE_KEY) == role
- )
-
- /** The spatial key is the first one found with context metadata attached to it. */
- private[rasterframes] def findSpatialKeyField: Option[StructField] =
- findRoleField(SPATIAL_KEY_COLUMN.columnName)
-
- /** The temporal key is the first one found with the temporal tag. */
- private[rasterframes] def findTemporalKeyField: Option[StructField] =
- findRoleField(TEMPORAL_KEY_COLUMN.columnName)
-
- /** Renames all columns such that they start with the given prefix string.
- * Useful for preparing dataframes for joins where duplicate names may arise.
- */
- def withPrefixedColumnNames(prefix: String): DF =
- self.columns.foldLeft(self)((df, c) ⇒ df.withColumnRenamed(c, s"$prefix$c").asInstanceOf[DF])
-
- /** Converts this DataFrame to a RasterFrame after ensuring it has:
- *
- *
- * - a space or space-time key column
- *
- one or more tile columns
- *
- tile layout metadata
- *
- *
- * If any of the above are violated, and [[IllegalArgumentException]] is thrown.
- *
- * @return validated RasterFrame
- * @throws IllegalArgumentException when constraints are not met.
- */
- @throws[IllegalArgumentException]
- def asRF: RasterFrame = {
- val potentialRF = certifyRasterframe(self)
-
- require(
- potentialRF.findSpatialKeyField.nonEmpty,
- "A RasterFrame requires a column identified as a spatial key"
- )
-
- require(potentialRF.tileColumns.nonEmpty, "A RasterFrame requires at least one tile column")
-
- require(
- Try(potentialRF.tileLayerMetadata).isSuccess,
- "A RasterFrame requires embedded TileLayerMetadata"
- )
-
- potentialRF
- }
-
- /**
- * Convert DataFrame into a RasterFrame
- *
- * @param spatialKey The column where the spatial key is stored
- * @param tlm Metadata describing layout under which tiles were created. Note: no checking is
- * performed to ensure metadata, key-space, and tiles are coherent.
- * @throws IllegalArgumentException when constraints outlined in `asRF` are not met.
- * @return Encoded RasterFrame
- */
- @throws[IllegalArgumentException]
- def asRF(spatialKey: Column, tlm: TileLayerMetadata[SpatialKey]): RasterFrame =
- setSpatialColumnRole(spatialKey, tlm).asRF
-
- /**
- * Convert DataFrame into a RasterFrame
- *
- * @param spatialKey The column where the spatial key is stored
- * @param temporalKey The column tagged under the temporal role
- * @param tlm Metadata describing layout under which tiles were created. Note: no checking is
- * performed to ensure metadata, key-space, and tiles are coherent.
- * @throws IllegalArgumentException when constraints outlined in `asRF` are not met.
- * @return Encoded RasterFrame
- */
- @throws[IllegalArgumentException]
- def asRF(spatialKey: Column, temporalKey: Column, tlm: TileLayerMetadata[SpaceTimeKey]): RasterFrame =
- setSpatialColumnRole(spatialKey, tlm)
- .setTemporalColumnRole(temporalKey)
- .asRF
-
-// @throws[IllegalArgumentException]
-// def asRF(space: LayerSpace): RasterFrame = {
-// require(tileColumns.isEmpty, "This method doesn't yet support existing tile columns")
-// // We have two use cases to consider: This is already a rasterframe and we need to
-// // reproject it. If we have RasterRefs then we reproject those
-// val (refFields, otherFields) = self.schema.fields
-// .partition(_.dataType.typeName.equalsIgnoreCase(RasterRefUDT.typeName))
-//
-// val refCols = refFields.map(f ⇒ self(f.name).as[RasterRef])
-// val otherCols = otherFields.map(f ⇒ self(f.name))
-//
-// // Reproject tile into layer space
-// val projected = self.select(otherCols :+ ProjectIntoLayer(refCols, space): _*)
-//
-// // Lastly, convert cell type as desired
-// val tileCols = projected.tileColumns.map(c ⇒ convert_cell_type(c, space.cell_type).as(c.columnName))
-// val remCols = projected.notTileColumns
-//
-// val layer = projected.select(remCols ++ tileCols: _*)
-//
-// val tlm = space.asTileLayerMetadata
-// layer.setSpatialColumnRole(SPATIAL_KEY_COLUMN, tlm).asRF
-// }
-
- /**
- * Converts [[DataFrame]] to a RasterFrame if the following constraints are fulfilled:
- *
- *
- * - a space or space-time key column
- *
- one or more tile columns
- *
- tile layout metadata
- *
- *
- * @return Some[RasterFrame] if constraints fulfilled, [[None]] otherwise.
- */
- def asRFSafely: Option[RasterFrame] = Try(asRF).toOption
-
- /**
- * Tests for the following conditions on the [[DataFrame]]:
- *
- *
- * - a space or space-time key column
- *
- one or more tile columns
- *
- tile layout metadata
- *
- *
- * @return true if all constraints are fulfilled, false otherwise.
- */
- def isRF: Boolean = Try(asRF).isSuccess
-
- /** Internal method for slapping the RasterFreame seal of approval on a DataFrame.
- * Only call if if you are sure it has a spatial key and tile columns and TileLayerMetadata. */
- private[astraea] def certify = certifyRasterframe(self)
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/HttpRangeReader.scala b/core/src/main/scala/astraea/spark/rasterframes/ref/HttpRangeReader.scala
deleted file mode 100644
index b0675e8f4..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/ref/HttpRangeReader.scala
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2016 Azavea
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// NB: Copied from
-// https://github.com/locationtech/geotrellis/blob/835200c0a6031ddc09fca218ff082cefc9b116c6/spark/src/main/scala/geotrellis/spark/io/http/util/HttpRangeReader.scala
-// TODO: Submit PR with changes/fixes.
-//package geotrellis.spark.io.http.util
-
-package astraea.spark.rasterframes.ref
-
-
-import geotrellis.util.{LazyLogging, RangeReader}
-import scalaj.http.{Http, HttpResponse}
-import java.net.{URI, URL}
-
-import scala.util.Try
-
-
-/**
- * This class extends [[RangeReader]] by reading chunks out of a GeoTiff at the
- * specified HTTP location.
- *
- * @param url: A [[URL]] pointing to the desired GeoTiff.
- */
-class HttpRangeReader(url: URL, useHeadRequest: Boolean) extends RangeReader with LazyLogging {
-
- val request = Http(url.toString)
-
- lazy val response: HttpResponse[String] = if(useHeadRequest) {
- request.method("HEAD").asString
- }
- else {
- request.method("GET").execute { is => "" }
- }
-
- lazy val totalLength: Long = {
- /**
- * "The Accept-Ranges response HTTP header is a marker used by the server
- * to advertise its support of partial requests. The value of this field
- * indicates the unit that can be used to define a range."
- * https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Ranges
- */
- require(response.header("Accept-Ranges").contains("bytes"), "Server doesn't support ranged byte reads")
-
- val contentLength = response
- .header("Content-Length")
- .flatMap({ cl => Try(cl.toLong).toOption }) match {
- case Some(num) => num
- case None => -1L
- }
-
- require(contentLength > 0,
- "Server didn't provide (required) \"Content-Length\" headers, unable to do range-based read")
-
- contentLength
- }
-
- def readClippedRange(start: Long, length: Int): Array[Byte] = {
- //println("Range read", s"$start-${start + length} ($length bytes)")
-
- val res = request
- .method("GET")
- .header("Range", s"bytes=${start}-${start + length}")
- .asBytes
-
- /**
- * "If the byte-range-set is unsatisfiable, the server SHOULD return
- * a response with a status of 416 (Requested range not satisfiable).
- * Otherwise, the server SHOULD return a response with a status of 206
- * (Partial Content) containing the satisfiable ranges of the entity-body."
- * https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html
- */
- require(res.code != 416,
- "Server unable to generate the byte range between ${start} and ${start + length}")
-
- if (res.code != 206) logger.info("Server responded to range request with HTTP code other than PARTIAL_RESPONSE (206)")
-
- res.body
- }
-
-}
-
-/** The companion object of [[HttpRangeReader]] */
-object HttpRangeReader {
-
- def apply(address: String): HttpRangeReader = apply(new URL(address))
-
- def apply(uri: URI): HttpRangeReader = apply(uri.toURL)
-
- /**
- * Returns a new instance of HttpRangeReader.
- *
- * @param url: A [[URL]] pointing to the desired GeoTiff.
- * @return A new instance of HttpRangeReader.
- */
- def apply(url: URL): HttpRangeReader = new HttpRangeReader(url, true)
-
- /**
- * Returns a new instance of HttpRangeReader which does not use HEAD
- * to determine the totalLength.
- *
- * @param url: A [[URL]] pointing to the desired GeoTiff.
- * @return A new instance of HttpRangeReader.
- */
- def withoutHeadRequest(url: URL): HttpRangeReader = new HttpRangeReader(url, false)
-
- def withoutHeadRequest(address: String): HttpRangeReader = withoutHeadRequest(new URL(address))
-
- def withoutHeadRequest(uri: URI): HttpRangeReader = withoutHeadRequest(uri.toURL)
-}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/LayerSpace.scala b/core/src/main/scala/astraea/spark/rasterframes/ref/LayerSpace.scala
deleted file mode 100644
index 25a9d050c..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/ref/LayerSpace.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes.ref
-
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
-import geotrellis.proj4.CRS
-import geotrellis.raster._
-import geotrellis.raster.resample.ResampleMethod
-import geotrellis.spark.tiling.LayoutDefinition
-import geotrellis.spark.{SpatialKey, _}
-
-
-/**
- * NB: This package is only a temporary home for this.
- *
- * @since 9/5/18
- */
-case class LayerSpace(
- crs: CRS,
- cellType: CellType,
- layout: LayoutDefinition,
- resampleMethod: ResampleMethod = ResampleMethod.DEFAULT
-) {
-
- def reproject(dest: CRS): LayerSpace = {
- copy(
- crs = dest,
- layout = layout.copy(extent = layout.extent.reproject(crs, dest))
- )
- }
-
- def asTileLayerMetadata: TileLayerMetadata[SpatialKey] = {
- val bounds = KeyBounds(
- SpatialKey(0, 0),
- SpatialKey(layout.layoutCols - 1, layout.layoutRows - 1)
- )
- TileLayerMetadata(cellType, layout, layout.extent, crs, bounds)
- }
-}
-
-object LayerSpace {
-
- private[rasterframes]
- def defaultLayout(prt: ProjectedRasterTile): LayoutDefinition =
- LayoutDefinition(prt.extent, TileLayout(1, 1, prt.cols, prt.rows))
-
- def from(rs: RasterSource): LayerSpace = new LayerSpace(
- rs.crs, rs.cellType, LayoutDefinition(rs.extent, rs.nativeLayout
- .getOrElse(TileLayout(1, 1, rs.cols, rs.rows))
- )
- )
-
- def from(rr: RasterRef): LayerSpace = new LayerSpace(
- rr.crs, rr.cellType, RasterRef.defaultLayout(rr)
- )
-
- def from(prt: ProjectedRasterTile): LayerSpace = new LayerSpace(
- prt.crs, prt.cellType, defaultLayout(prt)
- )
-
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala b/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala
deleted file mode 100644
index 9dc9bd55e..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea. Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- *
- */
-
-package astraea.spark.rasterframes.ref
-
-import java.net.URI
-import java.time.ZonedDateTime
-import java.time.format.DateTimeFormatter
-
-import astraea.spark.rasterframes.NOMINAL_TILE_SIZE
-import astraea.spark.rasterframes.model.TileContext
-import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
-import astraea.spark.rasterframes.util.GeoTiffInfoSupport
-import com.typesafe.scalalogging.LazyLogging
-import geotrellis.proj4.CRS
-import geotrellis.raster._
-import geotrellis.raster.io.geotiff.reader.GeoTiffReader
-import geotrellis.raster.io.geotiff.{GeoTiffSegmentLayout, MultibandGeoTiff, SinglebandGeoTiff, Tags}
-import geotrellis.raster.split.Split
-import geotrellis.spark.io.hadoop.HdfsRangeReader
-import geotrellis.spark.io.s3.S3Client
-import geotrellis.spark.io.s3.util.S3RangeReader
-import geotrellis.spark.tiling.LayoutDefinition
-import geotrellis.util.{FileRangeReader, RangeReader}
-import geotrellis.vector.Extent
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.rf.RasterSourceUDT
-
-import scala.util.Try
-
-/**
- * Abstraction over fetching geospatial raster data.
- *
- * @since 8/21/18
- */
-@Experimental
-sealed trait RasterSource extends ProjectedRasterLike with Serializable {
- def crs: CRS
-
- def extent: Extent
-
- def timestamp: Option[ZonedDateTime]
-
- def cellType: CellType
-
- def bandCount: Int
-
- def tags: Option[Tags]
-
- def read(extent: Extent): Either[Raster[Tile], Raster[MultibandTile]]
-
- /** Reads the given extent as a single multiband raster. */
- def readMultiband(extent: Extent): Raster[MultibandTile] =
- read(extent).fold(r => {
- r.copy(tile = MultibandTile(r.tile))
- }, identity)
-
- def readAll(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]]
- def readAllMultiband(): Seq[Raster[MultibandTile]] =
- readAll().fold(_.map(r => {
- r.copy(tile = MultibandTile(r.tile))
- }), identity)
-
- def readAllLazy(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]] = {
- val extents = nativeTiling
- if (bandCount == 1) {
- val rasters = for {
- extent ← extents
- rr = RasterRef(this, Some(extent))
- tile: Tile = RasterRefTile(rr)
- } yield Raster(tile, extent)
- Left(rasters)
- }
- else {
- // Need to figure this out.
- RasterSource._logger.warn("Lazy reading is not available for multiband images. Performing eager read.")
- val rasters = for {
- extent ← extents
- raster = this.read(extent).right.get
- } yield raster
- Right(rasters)
- }
- }
-
- def nativeLayout: Option[TileLayout]
-
- def rasterExtent = RasterExtent(extent, cols, rows)
-
- def cellSize = CellSize(extent, cols, rows)
-
- def gridExtent = GridExtent(extent, cellSize)
-
- def tileContext: TileContext = TileContext(extent, crs)
-
- def nativeTiling: Seq[Extent] = {
- nativeLayout.map { tileLayout ⇒
- val layout = LayoutDefinition(extent, tileLayout)
- val transform = layout.mapTransform
- for {
- col ← 0 until tileLayout.layoutCols
- row ← 0 until tileLayout.layoutRows
- } yield transform(col, row)
- }
- .getOrElse(Seq(extent))
- }
-}
-
-object RasterSource extends LazyLogging {
- implicit def rsEncoder: ExpressionEncoder[RasterSource] = {
- RasterSourceUDT // Makes sure UDT is registered first
- ExpressionEncoder()
- }
-
- private def _logger = logger
-
- def apply(source: URI, callback: Option[ReadCallback] = None): RasterSource =
- source.getScheme match {
- case "http" | "https" ⇒ HttpGeoTiffRasterSource(source, callback)
- case "file" ⇒ FileGeoTiffRasterSource(source, callback)
- case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" ⇒
- // TODO: How can we get the active hadoop configuration
- // TODO: without having to pass it through?
- val config = () ⇒ new Configuration()
- HadoopGeoTiffRasterSource(source, config, callback)
- case "s3" ⇒
- val client = () ⇒ S3Client.DEFAULT
- S3GeoTiffRasterSource(source, client, callback)
- case s ⇒ throw new UnsupportedOperationException(s"Scheme '$s' not supported")
- }
-
-
- case class SimpleGeoTiffInfo(
- cellType: CellType,
- extent: Extent,
- rasterExtent: RasterExtent,
- crs: CRS,
- tags: Tags,
- segmentLayout: GeoTiffSegmentLayout,
- bandCount: Int,
- noDataValue: Option[Double]
- )
-
- object SimpleGeoTiffInfo {
- def apply(info: GeoTiffReader.GeoTiffInfo): SimpleGeoTiffInfo =
- SimpleGeoTiffInfo(info.cellType, info.extent, info.rasterExtent, info.crs, info.tags, info.segmentLayout, info.bandCount, info.noDataValue)
- }
-
- // According to https://goo.gl/2z8xx9 the GeoTIFF date format is 'YYYY:MM:DD HH:MM:SS'
- private val dateFormat = DateTimeFormatter.ofPattern("yyyy:MM:dd HH:mm:ss")
-
- trait URIRasterSource { _: RasterSource ⇒
- def source: URI
-
- abstract override def toString: String = {
- s"${getClass.getSimpleName}(${source})"
- }
- }
-
- case class InMemoryRasterSource(tile: Tile, extent: Extent, crs: CRS) extends RasterSource {
- def this(prt: ProjectedRasterTile) = this(prt, prt.extent, prt.crs)
-
- override def rows: Int = tile.rows
-
- override def cols: Int = tile.cols
-
- override def timestamp: Option[ZonedDateTime] = None
-
- override def cellType: CellType = tile.cellType
-
- override def bandCount: Int = 1
-
- override def tags: Option[Tags] = None
-
- override def read(extent: Extent): Either[Raster[Tile], Raster[MultibandTile]] = Left(
- Raster(tile.crop(rasterExtent.gridBoundsFor(extent, false)), extent)
- )
-
- override def nativeLayout: Option[TileLayout] = Some(
- TileLayout(
- layoutCols = math.ceil(this.cols.toDouble / NOMINAL_TILE_SIZE).toInt,
- layoutRows = math.ceil(this.rows.toDouble / NOMINAL_TILE_SIZE).toInt,
- tileCols = NOMINAL_TILE_SIZE,
- tileRows = NOMINAL_TILE_SIZE)
- )
-
- def readAll(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]] = {
- Left(Raster(tile, extent).split(nativeLayout.get, Split.Options(false, false)).toSeq)
- }
- }
-
- trait RangeReaderRasterSource extends RasterSource with GeoTiffInfoSupport with LazyLogging {
- protected def rangeReader: RangeReader
-
- private def realInfo =
- GeoTiffReader.readGeoTiffInfo(rangeReader, streaming = true, withOverviews = false)
-
- private lazy val tiffInfo = SimpleGeoTiffInfo(realInfo)
-
- def crs: CRS = tiffInfo.crs
-
- def extent: Extent = tiffInfo.extent
-
- def timestamp: Option[ZonedDateTime] = resolveDate
-
- override def cols: Int = tiffInfo.rasterExtent.cols
-
- override def rows: Int = tiffInfo.rasterExtent.rows
-
- def cellType: CellType = tiffInfo.cellType
-
- def bandCount: Int = tiffInfo.bandCount
-
- override def tags: Option[Tags] = Option(tiffInfo.tags)
-
- def nativeLayout: Option[TileLayout] = {
- if (tiffInfo.segmentLayout.isTiled)
- Some(tiffInfo.segmentLayout.tileLayout)
- else None
- }
-
- // TODO: Determine if this is the correct way to handle time.
- protected def resolveDate: Option[ZonedDateTime] = {
- tiffInfo.tags.headTags
- .get(Tags.TIFFTAG_DATETIME)
- .flatMap(ds ⇒ Try({
- logger.debug("Parsing header date: " + ds)
- ZonedDateTime.parse(ds, dateFormat)
- }).toOption)
- }
-
- def read(extent: Extent): Either[Raster[Tile], Raster[MultibandTile]] = {
- val info = realInfo
- if (bandCount == 1) {
- val geoTiffTile = GeoTiffReader.geoTiffSinglebandTile(info)
- val gt = new SinglebandGeoTiff(
- geoTiffTile,
- info.extent,
- info.crs,
- info.tags,
- info.options,
- List.empty
- )
- Left(gt.crop(extent).raster)
- }
- else {
- val geoTiffTile = GeoTiffReader.geoTiffMultibandTile(info)
- val gt = new MultibandGeoTiff(
- geoTiffTile,
- info.extent,
- info.crs,
- info.tags,
- info.options,
- List.empty
- )
- Right(gt.crop(extent).raster)
- }
- }
-
- def readAll(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]] = {
- val info = realInfo
-
- // Thanks to @pomadchin for showing us how to do this :-)
- val windows = info.segmentLayout.listWindows(NOMINAL_TILE_SIZE)
- val re = info.rasterExtent
-
- if (info.bandCount == 1) {
- val geotile = GeoTiffReader.geoTiffSinglebandTile(info)
-
- val rows = windows.map(gb ⇒ {
- val tile = geotile.crop(gb)
- val extent = re.extentFor(gb, clamp = false)
- Raster(tile, extent)
- })
-
- Left(rows.toSeq)
- }
- else {
- val geotile = GeoTiffReader.geoTiffMultibandTile(info)
-
- val rows = windows.map(gb ⇒ {
- val tile = geotile.crop(gb)
- val extent = re.extentFor(gb, clamp = false)
- Raster(tile, extent)
- })
-
- Right(rows.toSeq)
- }
- }
- }
-
- case class FileGeoTiffRasterSource(source: URI, callback: Option[ReadCallback]) extends RangeReaderRasterSource
- with URIRasterSource with URIRasterSourceDebugString { self ⇒
- @transient
- protected lazy val rangeReader = {
- val base = FileRangeReader(source.getPath)
- // TODO: DRY
- callback.map(cb ⇒ ReportingRangeReader(base, cb, self)).getOrElse(base)
- }
- }
-
- case class HadoopGeoTiffRasterSource(source: URI, config: () ⇒ Configuration, callback: Option[ReadCallback]) extends RangeReaderRasterSource
- with URIRasterSource with URIRasterSourceDebugString { self ⇒
- @transient
- protected lazy val rangeReader = {
- val base = HdfsRangeReader(new Path(source.getPath), config())
- callback.map(cb ⇒ ReportingRangeReader(base, cb, self)).getOrElse(base)
- }
- }
-
- case class S3GeoTiffRasterSource(source: URI, client: () ⇒ S3Client, callback: Option[ReadCallback]) extends RangeReaderRasterSource
- with URIRasterSource with URIRasterSourceDebugString { self ⇒
- @transient
- protected lazy val rangeReader = {
- val base = S3RangeReader(source, client())
- callback.map(cb ⇒ ReportingRangeReader(base, cb, self)).getOrElse(base)
- }
- }
-
- case class HttpGeoTiffRasterSource(source: URI, callback: Option[ReadCallback]) extends RangeReaderRasterSource
- with URIRasterSource with URIRasterSourceDebugString { self ⇒
-
- @transient
- protected lazy val rangeReader = {
- val base = HttpRangeReader(source)
- callback.map(cb ⇒ ReportingRangeReader(base, cb, self)).getOrElse(base)
- }
-
- override protected def resolveDate: Option[ZonedDateTime] = {
- super.resolveDate
- .orElse {
- val hrr = rangeReader match {
- case h: HttpRangeReader ⇒ h
- case ReportingRangeReader(h: HttpRangeReader, _, _) ⇒ h
- }
- hrr.response.headers.get("Last-Modified")
- .flatMap(_.headOption)
- .flatMap(s ⇒ Try(ZonedDateTime.parse(s, DateTimeFormatter.RFC_1123_DATE_TIME)).toOption)
- }
- }
- }
-
- /** Trait for registering a callback for logging or monitoring range reads.
- * NB: the callback will be invoked from within a Spark task, and therefore
- * is serialized along with its closure to executors. */
- trait ReadCallback extends Serializable {
- def readRange(source: RasterSource, start: Long, length: Int): Unit
- }
-
- private case class ReportingRangeReader(delegate: RangeReader, callback: ReadCallback, parent: RasterSource) extends RangeReader {
- override def totalLength: Long = delegate.totalLength
-
- override protected def readClippedRange(start: Long, length: Int): Array[Byte] = {
- callback.readRange(parent, start, length)
- delegate.readRange(start, length)
- }
- }
-
- trait URIRasterSourceDebugString {
- _: RangeReaderRasterSource with URIRasterSource with Product ⇒
- def toDebugString: String = {
- val buf = new StringBuilder()
- buf.append(productPrefix)
- buf.append("(")
- buf.append("source=")
- buf.append(source.toASCIIString)
- buf.append(", size=")
- buf.append(size)
- buf.append(", dimensions=")
- buf.append(dimensions)
- buf.append(", crs=")
- buf.append(crs)
- buf.append(", extent=")
- buf.append(extent)
- buf.append(", timestamp=")
- buf.append(timestamp)
- buf.append(")")
- buf.toString
- }
- }
-
-}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialUDFSubstitutionRules.scala b/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialUDFSubstitutionRules.scala
deleted file mode 100644
index c0f985b0b..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialUDFSubstitutionRules.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-package astraea.spark.rasterframes.rules
-
-import astraea.spark.rasterframes.expressions.SpatialRelation
-import org.apache.spark.sql.catalyst.expressions.ScalaUDF
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * Swaps out spatial relation UDFs for expression forms.
- *
- * @since 2/19/18
- */
-object SpatialUDFSubstitutionRules extends Rule[LogicalPlan] {
- def apply(plan: LogicalPlan): LogicalPlan = {
- plan.transform {
- case q: LogicalPlan ⇒ q.transformExpressions {
- case s: ScalaUDF ⇒ SpatialRelation.fromUDF(s).getOrElse(s)
- }
- }
- }
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/tiles/DelegatingTile.scala b/core/src/main/scala/astraea/spark/rasterframes/tiles/DelegatingTile.scala
deleted file mode 100644
index 8f0a910c7..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/tiles/DelegatingTile.scala
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes.tiles
-
-import geotrellis.raster._
-
-/**
- * A tile that wraps another tile. Originally intended for delayed reading, but useful in other special use cases.
- *
- * @since 8/22/18
- */
-trait DelegatingTile extends Tile {
- protected def delegate: Tile
-
- def cellType: CellType =
- delegate.cellType
-
- def cols: Int =
- delegate.cols
-
- def rows: Int =
- delegate.rows
-
- def mutable: MutableArrayTile =
- delegate.mutable
-
- def convert(cellType: CellType): Tile =
- delegate.convert(cellType)
-
- override def withNoData(noDataValue: Option[Double]): Tile =
- delegate.withNoData(noDataValue)
-
- def interpretAs(newCellType: CellType): Tile =
- delegate.interpretAs(newCellType)
-
- def get(col: Int, row: Int): Int =
- delegate.get(col, row)
-
- def getDouble(col: Int, row: Int): Double =
- delegate.getDouble(col, row)
-
- def toArrayTile(): ArrayTile =
- delegate.toArrayTile()
-
- def toArray(): Array[Int] =
- delegate.toArray()
-
- def toArrayDouble(): Array[Double] =
- delegate.toArrayDouble()
-
- def toBytes(): Array[Byte] =
- delegate.toBytes()
-
- def foreach(f: Int ⇒ Unit): Unit =
- delegate.foreach(f)
-
- def foreachDouble(f: Double ⇒ Unit): Unit =
- delegate.foreachDouble(f)
-
- def map(f: Int ⇒ Int): Tile =
- delegate.map(f)
-
- def combine(r2: Tile)(f: (Int, Int) ⇒ Int): Tile = (delegate, r2) match {
- // Hack until https://github.com/locationtech/geotrellis/issues/2792
- case (del: ArrayTile, r2: DelegatingTile) ⇒ del.combine(r2.toArrayTile())(f)
- case _ ⇒ delegate.combine(r2)(f)
- }
-
- def combineDouble(r2: Tile)(f: (Double, Double) ⇒ Double): Tile = (delegate, r2) match {
- // Hack until https://github.com/locationtech/geotrellis/issues/2792
- case (del: ArrayTile, r2: DelegatingTile) ⇒ del.combineDouble(r2.toArrayTile())(f)
- case _ ⇒ delegate.combineDouble(r2)(f)
- }
-
- def mapDouble(f: Double ⇒ Double): Tile =
- delegate.mapDouble(f)
-
- def foreachIntVisitor(visitor: IntTileVisitor): Unit =
- delegate.foreachIntVisitor(visitor)
-
- def foreachDoubleVisitor(visitor: DoubleTileVisitor): Unit =
- delegate.foreachDoubleVisitor(visitor)
-
- def mapIntMapper(mapper: IntTileMapper): Tile =
- delegate.mapIntMapper(mapper)
-
- def mapDoubleMapper(mapper: DoubleTileMapper): Tile =
- delegate.mapDoubleMapper(mapper)
-
-}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/ReadAccumulator.scala b/core/src/main/scala/astraea/spark/rasterframes/util/ReadAccumulator.scala
deleted file mode 100644
index f9cdfb48e..000000000
--- a/core/src/main/scala/astraea/spark/rasterframes/util/ReadAccumulator.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes.util
-
-import astraea.spark.rasterframes.ref.RasterSource
-import astraea.spark.rasterframes.ref.RasterSource.ReadCallback
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.spark.SparkContext
-import org.apache.spark.util.LongAccumulator
-
-/**
- * Support for keeping counts of read operations from RasterSource-s
- *
- * @since 9/3/18
- */
-case class ReadAccumulator(reads: () ⇒ LongAccumulator, bytes: () ⇒ LongAccumulator) extends ReadCallback {
- override def readRange(source: RasterSource, start: Long, length: Int): Unit = {
- reads().add(1)
- bytes().add(length)
- }
- override def toString: String =
- s"${productPrefix}(reads=${reads().value}, bytes=${bytes().value})"
-}
-
-object ReadAccumulator extends LazyLogging {
- def apply(sc: SparkContext, prefix: String): ReadAccumulator = this.synchronized {
- val reads = sc.longAccumulator(prefix + ".reads")
- val bytes = sc.longAccumulator(prefix + ".bytes")
- new ReadAccumulator(() ⇒ reads, () ⇒ bytes)
- }
-}
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/sql/rf/FilterTranslator.scala b/core/src/main/scala/org/apache/spark/sql/rf/FilterTranslator.scala
index edecc44b5..6433ef8d3 100644
--- a/core/src/main/scala/org/apache/spark/sql/rf/FilterTranslator.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rf/FilterTranslator.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.rf
import java.sql.{Date, Timestamp}
-import astraea.spark.rasterframes.expressions.SpatialRelation.{Contains, Intersects}
-import astraea.spark.rasterframes.rules._
+import org.locationtech.rasterframes.expressions.SpatialRelation.{Contains, Intersects}
+import org.locationtech.rasterframes.rules._
import org.apache.spark.sql.catalyst.CatalystTypeConverters.{convertToScala, createToScalaConverter}
import org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.expressions.{Attribute, EmptyRow, Expression, Literal}
@@ -30,6 +30,7 @@ import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.{DateType, StringType, TimestampType}
import org.apache.spark.unsafe.types.UTF8String
import org.locationtech.geomesa.spark.jts.rules.GeometryLiteral
+import org.locationtech.rasterframes.rules.{SpatialFilters, TemporalFilters}
/**
* This is a copy of [[org.apache.spark.sql.execution.datasources.DataSourceStrategy.translateFilter]], modified to add our spatial predicates.
diff --git a/core/src/main/scala/org/apache/spark/sql/rf/RasterSourceUDT.scala b/core/src/main/scala/org/apache/spark/sql/rf/RasterSourceUDT.scala
index 2dee38a6e..772bde6fe 100644
--- a/core/src/main/scala/org/apache/spark/sql/rf/RasterSourceUDT.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rf/RasterSourceUDT.scala
@@ -23,12 +23,12 @@ package org.apache.spark.sql.rf
import java.nio.ByteBuffer
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.ref.RasterSource
-import astraea.spark.rasterframes.util.KryoSupport
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.{DataType, UDTRegistration, UserDefinedType, _}
+import org.locationtech.rasterframes.encoders.CatalystSerializer
+import org.locationtech.rasterframes.ref.RasterSource
+import org.locationtech.rasterframes.util.KryoSupport
/**
* Catalyst representation of a RasterSource.
@@ -40,11 +40,11 @@ class RasterSourceUDT extends UserDefinedType[RasterSource] {
import RasterSourceUDT._
override def typeName = "rf_rastersource"
- override def pyUDT: String = "pyrasterframes.RasterSourceUDT"
+ override def pyUDT: String = "pyrasterframes.rf_types.RasterSourceUDT"
def userClass: Class[RasterSource] = classOf[RasterSource]
- override def sqlType: DataType = CatalystSerializer[RasterSource].schema
+ override def sqlType: DataType = schemaOf[RasterSource]
override def serialize(obj: RasterSource): InternalRow =
Option(obj)
@@ -65,9 +65,12 @@ class RasterSourceUDT extends UserDefinedType[RasterSource] {
}
}
-object RasterSourceUDT extends RasterSourceUDT {
+object RasterSourceUDT {
UDTRegistration.register(classOf[RasterSource].getName, classOf[RasterSourceUDT].getName)
+ /** Deserialize a byte array, also used inside the Python API */
+ def from(byteArray: Array[Byte]): RasterSource = CatalystSerializer.CatalystIO.rowIO.create(byteArray).to[RasterSource]
+
implicit val rasterSourceSerializer: CatalystSerializer[RasterSource] = new CatalystSerializer[RasterSource] {
override def schema: StructType = StructType(Seq(
diff --git a/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala b/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala
index 75ac0f7cf..66c0d98a1 100644
--- a/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala
@@ -20,14 +20,13 @@
*/
package org.apache.spark.sql.rf
-
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.model.{Cells, TileDataContext}
-import astraea.spark.rasterframes.tiles.InternalRowTile
import geotrellis.raster._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.{DataType, _}
+import org.locationtech.rasterframes.encoders.CatalystSerializer
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.model.{Cells, TileDataContext}
+import org.locationtech.rasterframes.tiles.InternalRowTile
/**
@@ -40,11 +39,11 @@ class TileUDT extends UserDefinedType[Tile] {
import TileUDT._
override def typeName = TileUDT.typeName
- override def pyUDT: String = "pyrasterframes.TileUDT"
+ override def pyUDT: String = "pyrasterframes.rf_types.TileUDT"
def userClass: Class[Tile] = classOf[Tile]
- def sqlType: StructType = CatalystSerializer[Tile].schema
+ def sqlType: StructType = schemaOf[Tile]
override def serialize(obj: Tile): InternalRow =
Option(obj)
@@ -57,7 +56,7 @@ class TileUDT extends UserDefinedType[Tile] {
case ir: InternalRow ⇒ ir.to[Tile]
}
.map {
- case realIRT: InternalRowTile ⇒ realIRT.toArrayTile()
+ case realIRT: InternalRowTile ⇒ realIRT.realizedTile
case other ⇒ other
}
.orNull
@@ -74,11 +73,10 @@ case object TileUDT {
final val typeName: String = "tile"
implicit def tileSerializer: CatalystSerializer[Tile] = new CatalystSerializer[Tile] {
- import scala.language.reflectiveCalls
override def schema: StructType = StructType(Seq(
- StructField("cell_context", CatalystSerializer[TileDataContext].schema, false),
- StructField("cell_data", CatalystSerializer[Cells].schema, false)
+ StructField("cell_context", schemaOf[TileDataContext], false),
+ StructField("cell_data", schemaOf[Cells], false)
))
override def to[R](t: Tile, io: CatalystIO[R]): R = io.create(
diff --git a/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala b/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala
index b9eb96981..81418d466 100644
--- a/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala
+++ b/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala
@@ -1,18 +1,18 @@
package org.apache.spark.sql.rf
-import java.lang.reflect.{Constructor, Method}
+import java.lang.reflect.Constructor
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.{FunctionBuilder, expressionInfo}
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SQLContext}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryExpression, Expression, ExpressionDescription, ExpressionInfo, RuntimeReplaceable, ScalaUDF}
import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, InvokeLike}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ExpressionDescription, ExpressionInfo}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.sources.BaseRelation
import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SQLContext}
import scala.reflect._
import scala.util.{Failure, Success, Try}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/MetadataKeys.scala b/core/src/main/scala/org/locationtech/rasterframes/MetadataKeys.scala
new file mode 100644
index 000000000..9eb4000d9
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/MetadataKeys.scala
@@ -0,0 +1,34 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2018 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
+
+/**
+ *
+ * @since 2/19/18
+ */
+trait MetadataKeys {
+ /** Key under which ContextRDD metadata is stored. */
+ private[rasterframes] val CONTEXT_METADATA_KEY = "_context"
+
+ /** Key under which RasterFrameLayer role a column plays. */
+ private[rasterframes] val SPATIAL_ROLE_KEY = "_stRole"
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/PairRDDConverter.scala b/core/src/main/scala/org/locationtech/rasterframes/PairRDDConverter.scala
similarity index 86%
rename from core/src/main/scala/astraea/spark/rasterframes/PairRDDConverter.scala
rename to core/src/main/scala/org/locationtech/rasterframes/PairRDDConverter.scala
index 83686dbfd..658c0d65d 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/PairRDDConverter.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/PairRDDConverter.scala
@@ -1,8 +1,29 @@
-package astraea.spark.rasterframes
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes.util._
import geotrellis.raster.{MultibandTile, Tile, TileFeature}
-import geotrellis.spark.{SpaceTimeKey, SpatialKey, TemporalKey}
+import geotrellis.spark.{SpaceTimeKey, SpatialKey}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.rf.TileUDT
@@ -15,7 +36,7 @@ import scala.annotation.implicitNotFound
*
* @since 4/8/18
*/
-@implicitNotFound("An RDD converter is required create a RasterFrame. " +
+@implicitNotFound("An RDD converter is required create a RasterFrameLayer. " +
"Please provide an implementation of PairRDDConverter[${K}, ${V}].")
trait PairRDDConverter[K, V] extends Serializable {
val schema: StructType
diff --git a/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala b/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala
new file mode 100644
index 000000000..20b11c679
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala
@@ -0,0 +1,430 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2017 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
+import geotrellis.proj4.CRS
+import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
+import geotrellis.raster.{CellType, Tile}
+import geotrellis.vector.Extent
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.functions.{lit, udf}
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.jts.geom.Geometry
+import org.locationtech.rasterframes.expressions.TileAssembler
+import org.locationtech.rasterframes.expressions.accessors._
+import org.locationtech.rasterframes.expressions.aggregates._
+import org.locationtech.rasterframes.expressions.generators._
+import org.locationtech.rasterframes.expressions.localops._
+import org.locationtech.rasterframes.expressions.tilestats._
+import org.locationtech.rasterframes.expressions.transformers._
+import org.locationtech.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.stats._
+import org.locationtech.rasterframes.{functions => F}
+
+/**
+ * UDFs for working with Tiles in Spark DataFrames.
+ *
+ * @since 4/3/17
+ */
+trait RasterFunctions {
+ import util._
+
+ // format: off
+ /** Query the number of (cols, rows) in a Tile. */
+ def rf_dimensions(col: Column): TypedColumn[Any, TileDimensions] = GetDimensions(col)
+
+ /** Extracts the bounding box of a geometry as an Extent */
+ def st_extent(col: Column): TypedColumn[Any, Extent] = GeometryToExtent(col)
+
+ /** Extracts the bounding box from a RasterSource or ProjectedRasterTile */
+ def rf_extent(col: Column): TypedColumn[Any, Extent] = GetExtent(col)
+
+ /** Extracts the CRS from a RasterSource or ProjectedRasterTile */
+ def rf_crs(col: Column): TypedColumn[Any, CRS] = GetCRS(col)
+
+ /** Extracts the tile from a ProjectedRasterTile, or passes through a Tile. */
+ def rf_tile(col: Column): TypedColumn[Any, Tile] = RealizeTile(col)
+
+ /** Flattens Tile into a double array. */
+ def rf_tile_to_array_double(col: Column): TypedColumn[Any, Array[Double]] =
+ TileToArrayDouble(col)
+
+ /** Flattens Tile into an integer array. */
+ def rf_tile_to_array_int(col: Column): TypedColumn[Any, Array[Double]] =
+ TileToArrayDouble(col)
+
+ @Experimental
+ /** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/
+ def rf_array_to_tile(arrayCol: Column, cols: Int, rows: Int) = withAlias("rf_array_to_tile", arrayCol)(
+ udf[Tile, AnyRef](F.arrayToTile(cols, rows)).apply(arrayCol)
+ )
+
+ /** Create a Tile from a column of cell data with location indexes and preform cell conversion. */
+ def rf_assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] =
+ rf_convert_cell_type(TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile](singlebandTileEncoder)
+
+ /** Create a Tile from a column of cell data with location indexes. */
+ def rf_assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] =
+ TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows)
+
+ /** Extract the Tile's cell type */
+ def rf_cell_type(col: Column): TypedColumn[Any, CellType] = GetCellType(col)
+
+ /** Change the Tile's cell type */
+ def rf_convert_cell_type(col: Column, cellType: CellType): TypedColumn[Any, Tile] =
+ SetCellType(col, cellType)
+
+ /** Change the Tile's cell type */
+ def rf_convert_cell_type(col: Column, cellTypeName: String): TypedColumn[Any, Tile] =
+ SetCellType(col, cellTypeName)
+
+ /** Resample tile to different size based on scalar factor or tile whose dimension to match. Scalar less
+ * than one will downsample tile; greater than one will upsample. Uses nearest-neighbor. */
+ def rf_resample[T: Numeric](tileCol: Column, factorValue: T) = Resample(tileCol, factorValue)
+
+ /** Resample tile to different size based on scalar factor or tile whose dimension to match. Scalar less
+ * than one will downsample tile; greater than one will upsample. Uses nearest-neighbor. */
+ def rf_resample(tileCol: Column, factorCol: Column) = Resample(tileCol, factorCol)
+
+ /** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */
+ def st_geometry(extent: Column): TypedColumn[Any, Geometry] = ExtentToGeometry(extent)
+
+ /** Extract the extent of a RasterSource or ProjectedRasterTile as a Geometry type. */
+ def rf_geometry(raster: Column): TypedColumn[Any, Geometry] = GetGeometry(raster)
+
+ /** Assign a `NoData` value to the Tiles. */
+ def rf_with_no_data(col: Column, nodata: Double): TypedColumn[Any, Tile] = withTypedAlias("rf_with_no_data", col)(
+ udf[Tile, Tile](F.withNoData(nodata)).apply(col)
+ )
+
+ /** Compute the full column aggregate floating point histogram. */
+ def rf_agg_approx_histogram(col: Column): TypedColumn[Any, CellHistogram] =
+ HistogramAggregate(col)
+
+ /** Compute the full column aggregate floating point statistics. */
+ def rf_agg_stats(col: Column): TypedColumn[Any, CellStatistics] =
+ CellStatsAggregate(col)
+
+ /** Computes the column aggregate mean. */
+ def rf_agg_mean(col: Column) = CellMeanAggregate(col)
+
+ /** Computes the number of non-NoData cells in a column. */
+ def rf_agg_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.DataCells(col)
+
+ /** Computes the number of NoData cells in a column. */
+ def rf_agg_no_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.NoDataCells(col)
+
+ /** Compute the Tile-wise mean */
+ def rf_tile_mean(col: Column): TypedColumn[Any, Double] =
+ TileMean(col)
+
+ /** Compute the Tile-wise sum */
+ def rf_tile_sum(col: Column): TypedColumn[Any, Double] =
+ Sum(col)
+
+ /** Compute the minimum cell value in tile. */
+ def rf_tile_min(col: Column): TypedColumn[Any, Double] =
+ TileMin(col)
+
+ /** Compute the maximum cell value in tile. */
+ def rf_tile_max(col: Column): TypedColumn[Any, Double] =
+ TileMax(col)
+
+ /** Compute TileHistogram of Tile values. */
+ def rf_tile_histogram(col: Column): TypedColumn[Any, CellHistogram] =
+ TileHistogram(col)
+
+ /** Compute statistics of Tile values. */
+ def rf_tile_stats(col: Column): TypedColumn[Any, CellStatistics] =
+ TileStats(col)
+
+ /** Counts the number of non-NoData cells per Tile. */
+ def rf_data_cells(tile: Column): TypedColumn[Any, Long] =
+ DataCells(tile)
+
+ /** Counts the number of NoData cells per Tile. */
+ def rf_no_data_cells(tile: Column): TypedColumn[Any, Long] =
+ NoDataCells(tile)
+
+ /** Returns true if all cells in the tile are NoData.*/
+ def rf_is_no_data_tile(tile: Column): TypedColumn[Any, Boolean] =
+ IsNoDataTile(tile)
+
+ /** Returns true if any cells in the tile are true (non-zero and not NoData). */
+ def rf_exists(tile: Column): TypedColumn[Any, Boolean] = Exists(tile)
+
+ /** Returns true if all cells in the tile are true (non-zero and not NoData). */
+ def rf_for_all(tile: Column): TypedColumn[Any, Boolean] = ForAll(tile)
+
+ /** Compute cell-local aggregate descriptive statistics for a column of Tiles. */
+ def rf_agg_local_stats(col: Column) =
+ LocalStatsAggregate(col)
+
+ /** Compute the cell-wise/local max operation between Tiles in a column. */
+ def rf_agg_local_max(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMaxUDAF(col)
+
+ /** Compute the cellwise/local min operation between Tiles in a column. */
+ def rf_agg_local_min(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMinUDAF(col)
+
+ /** Compute the cellwise/local mean operation between Tiles in a column. */
+ def rf_agg_local_mean(col: Column): TypedColumn[Any, Tile] = LocalMeanAggregate(col)
+
+ /** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */
+ def rf_agg_local_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalDataCellsUDAF(col)
+
+ /** Compute the cellwise/local count of NoData cells for all Tiles in a column. */
+ def rf_agg_local_no_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalNoDataCellsUDAF(col)
+
+ /** Cellwise addition between two Tiles or Tile and scalar column. */
+ def rf_local_add(left: Column, right: Column): TypedColumn[Any, Tile] = Add(left, right)
+
+ /** Cellwise addition of a scalar value to a tile. */
+ def rf_local_add[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Add(tileCol, value)
+
+ /** Cellwise subtraction between two Tiles. */
+ def rf_local_subtract(left: Column, right: Column): TypedColumn[Any, Tile] = Subtract(left, right)
+
+ /** Cellwise subtraction of a scalar value from a tile. */
+ def rf_local_subtract[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Subtract(tileCol, value)
+
+ /** Cellwise multiplication between two Tiles. */
+ def rf_local_multiply(left: Column, right: Column): TypedColumn[Any, Tile] = Multiply(left, right)
+
+ /** Cellwise multiplication of a tile by a scalar value. */
+ def rf_local_multiply[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Multiply(tileCol, value)
+
+ /** Cellwise division between two Tiles. */
+ def rf_local_divide(left: Column, right: Column): TypedColumn[Any, Tile] = Divide(left, right)
+
+ /** Cellwise division of a tile by a scalar value. */
+ def rf_local_divide[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Divide(tileCol, value)
+
+ /** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */
+ def rf_local_algebra(op: LocalTileBinaryOp, left: Column, right: Column): TypedColumn[Any, Tile] =
+ withTypedAlias(opName(op), left, right)(udf[Tile, Tile, Tile](op.apply).apply(left, right))
+
+ /** Compute the normalized difference of two tile columns */
+ def rf_normalized_difference(left: Column, right: Column) =
+ NormalizedDifference(left, right)
+
+ /** Constructor for tile column with a single cell value. */
+ def rf_make_constant_tile(value: Number, cols: Int, rows: Int, cellType: CellType): TypedColumn[Any, Tile] =
+ rf_make_constant_tile(value, cols, rows, cellType.name)
+
+ /** Constructor for tile column with a single cell value. */
+ def rf_make_constant_tile(value: Number, cols: Int, rows: Int, cellTypeName: String): TypedColumn[Any, Tile] = {
+ import org.apache.spark.sql.rf.TileUDT.tileSerializer
+ val constTile = encoders.serialized_literal(F.makeConstantTile(value, cols, rows, cellTypeName))
+ withTypedAlias(s"rf_make_constant_tile($value, $cols, $rows, $cellTypeName)")(constTile)
+ }
+
+ /** Create a column constant tiles of zero */
+ def rf_make_zeros_tile(cols: Int, rows: Int, cellType: CellType): TypedColumn[Any, Tile] =
+ rf_make_zeros_tile(cols, rows, cellType.name)
+
+ /** Create a column constant tiles of zero */
+ def rf_make_zeros_tile(cols: Int, rows: Int, cellTypeName: String): TypedColumn[Any, Tile] = {
+ import org.apache.spark.sql.rf.TileUDT.tileSerializer
+ val constTile = encoders.serialized_literal(F.tileZeros(cols, rows, cellTypeName))
+ withTypedAlias(s"rf_make_zeros_tile($cols, $rows, $cellTypeName)")(constTile)
+ }
+
+ /** Creates a column of tiles containing all ones */
+ def rf_make_ones_tile(cols: Int, rows: Int, cellType: CellType): TypedColumn[Any, Tile] =
+ rf_make_ones_tile(cols, rows, cellType.name)
+
+ /** Creates a column of tiles containing all ones */
+ def rf_make_ones_tile(cols: Int, rows: Int, cellTypeName: String): TypedColumn[Any, Tile] = {
+ import org.apache.spark.sql.rf.TileUDT.tileSerializer
+ val constTile = encoders.serialized_literal(F.tileOnes(cols, rows, cellTypeName))
+ withTypedAlias(s"rf_make_ones_tile($cols, $rows, $cellTypeName)")(constTile)
+ }
+
+ /** Where the rf_mask tile contains NODATA, replace values in the source tile with NODATA */
+ def rf_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
+ Mask.MaskByDefined(sourceTile, maskTile)
+
+ /** Where the `maskTile` equals `maskValue`, replace values in the source tile with `NoData` */
+ def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
+ Mask.MaskByValue(sourceTile, maskTile, maskValue)
+
+ /** Where the `maskTile` does **not** contain `NoData`, replace values in the source tile with `NoData` */
+ def rf_inverse_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
+ Mask.InverseMaskByDefined(sourceTile, maskTile)
+
+ /** Where the `maskTile` does **not** equal `maskValue`, replace values in the source tile with `NoData` */
+ def rf_inverse_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
+ Mask.InverseMaskByValue(sourceTile, maskTile, maskValue)
+
+ /** Create a tile where cells in the grid defined by cols, rows, and bounds are filled with the given value. */
+ def rf_rasterize(geometry: Column, bounds: Column, value: Column, cols: Int, rows: Int): TypedColumn[Any, Tile] =
+ withTypedAlias("rf_rasterize", geometry)(
+ udf(F.rasterize(_: Geometry, _: Geometry, _: Int, cols, rows)).apply(geometry, bounds, value)
+ )
+
+ def rf_rasterize(geometry: Column, bounds: Column, value: Column, cols: Column, rows: Column): TypedColumn[Any, Tile] =
+ withTypedAlias("rf_rasterize", geometry)(
+ udf(F.rasterize).apply(geometry, bounds, value, cols, rows)
+ )
+
+ /** Reproject a column of geometry from one CRS to another.
+ * @param sourceGeom Geometry column to reproject
+ * @param srcCRS Native CRS of `sourceGeom` as a literal
+ * @param dstCRSCol Destination CRS as a column
+ */
+ def st_reproject(sourceGeom: Column, srcCRS: CRS, dstCRSCol: Column): TypedColumn[Any, Geometry] =
+ ReprojectGeometry(sourceGeom, srcCRS, dstCRSCol)
+
+ /** Reproject a column of geometry from one CRS to another.
+ * @param sourceGeom Geometry column to reproject
+ * @param srcCRSCol Native CRS of `sourceGeom` as a column
+ * @param dstCRS Destination CRS as a literal
+ */
+ def st_reproject(sourceGeom: Column, srcCRSCol: Column, dstCRS: CRS): TypedColumn[Any, Geometry] =
+ ReprojectGeometry(sourceGeom, srcCRSCol, dstCRS)
+
+ /** Reproject a column of geometry from one CRS to another.
+ * @param sourceGeom Geometry column to reproject
+ * @param srcCRS Native CRS of `sourceGeom` as a literal
+ * @param dstCRS Destination CRS as a literal
+ */
+ def st_reproject(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] =
+ ReprojectGeometry(sourceGeom, srcCRS, dstCRS)
+
+ /** Reproject a column of geometry from one CRS to another.
+ * @param sourceGeom Geometry column to reproject
+ * @param srcCRSCol Native CRS of `sourceGeom` as a column
+ * @param dstCRSCol Destination CRS as a column
+ */
+ def st_reproject(sourceGeom: Column, srcCRSCol: Column, dstCRSCol: Column): TypedColumn[Any, Geometry] =
+ ReprojectGeometry(sourceGeom, srcCRSCol, dstCRSCol)
+
+ /** Render Tile as ASCII string, for debugging purposes. */
+ def rf_render_ascii(col: Column): TypedColumn[Any, String] =
+ DebugRender.RenderAscii(col)
+
+ /** Render Tile cell values as numeric values, for debugging purposes. */
+ def rf_render_matrix(col: Column): TypedColumn[Any, String] =
+ DebugRender.RenderMatrix(col)
+
+ /** Cellwise less than value comparison between two tiles. */
+ def rf_local_less(left: Column, right: Column): TypedColumn[Any, Tile] =
+ Less(left, right)
+
+ /** Cellwise less than value comparison between a tile and a scalar. */
+ def rf_local_less[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ Less(tileCol, value)
+
+ /** Cellwise less than or equal to value comparison between a tile and a scalar. */
+ def rf_local_less_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
+ LessEqual(left, right)
+
+ /** Cellwise less than or equal to value comparison between a tile and a scalar. */
+ def rf_local_less_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ LessEqual(tileCol, value)
+
+ /** Cellwise greater than value comparison between two tiles. */
+ def rf_local_greater(left: Column, right: Column): TypedColumn[Any, Tile] =
+ Greater(left, right)
+
+ /** Cellwise greater than value comparison between a tile and a scalar. */
+ def rf_local_greater[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ Greater(tileCol, value)
+
+ /** Cellwise greater than or equal to value comparison between two tiles. */
+ def rf_local_greater_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
+ GreaterEqual(left, right)
+
+ /** Cellwise greater than or equal to value comparison between a tile and a scalar. */
+ def rf_local_greater_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ GreaterEqual(tileCol, value)
+
+ /** Cellwise equal to value comparison between two tiles. */
+ def rf_local_equal(left: Column, right: Column): TypedColumn[Any, Tile] =
+ Equal(left, right)
+
+ /** Cellwise equal to value comparison between a tile and a scalar. */
+ def rf_local_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ Equal(tileCol, value)
+
+ /** Cellwise inequality comparison between two tiles. */
+ def rf_local_unequal(left: Column, right: Column): TypedColumn[Any, Tile] =
+ Unequal(left, right)
+
+ /** Cellwise inequality comparison between a tile and a scalar. */
+ def rf_local_unequal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] =
+ Unequal(tileCol, value)
+
+ /** Round cell values to nearest integer without chaning cell type. */
+ def rf_round(tileCol: Column): TypedColumn[Any, Tile] =
+ Round(tileCol)
+
+ /** Compute the absolute value of each cell. */
+ def rf_abs(tileCol: Column): TypedColumn[Any, Tile] =
+ Abs(tileCol)
+
+ /** Take natural logarithm of cell values. */
+ def rf_log(tileCol: Column): TypedColumn[Any, Tile] =
+ Log(tileCol)
+
+ /** Take base 10 logarithm of cell values. */
+ def rf_log10(tileCol: Column): TypedColumn[Any, Tile] =
+ Log10(tileCol)
+
+ /** Take base 2 logarithm of cell values. */
+ def rf_log2(tileCol: Column): TypedColumn[Any, Tile] =
+ Log2(tileCol)
+
+ /** Natural logarithm of one plus cell values. */
+ def rf_log1p(tileCol: Column): TypedColumn[Any, Tile] =
+ Log1p(tileCol)
+
+ /** Exponential of cell values */
+ def rf_exp(tileCol: Column): TypedColumn[Any, Tile] =
+ Exp(tileCol)
+
+ /** Ten to the power of cell values */
+ def rf_exp10(tileCol: Column): TypedColumn[Any, Tile] =
+ Exp10(tileCol)
+
+ /** Two to the power of cell values */
+ def rf_exp2(tileCol: Column): TypedColumn[Any, Tile] =
+ Exp2(tileCol)
+
+ /** Exponential of cell values, less one*/
+ def rf_expm1(tileCol: Column): TypedColumn[Any, Tile] =
+ ExpM1(tileCol)
+
+ /** Return the incoming tile untouched. */
+ def rf_identity(tileCol: Column): TypedColumn[Any, Tile] =
+ Identity(tileCol)
+
+ /** Create a row for each cell in Tile. */
+ def rf_explode_tiles(cols: Column*): Column = rf_explode_tiles_sample(1.0, None, cols: _*)
+
+ /** Create a row for each cell in Tile with random sampling and optional seed. */
+ def rf_explode_tiles_sample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column =
+ ExplodeTiles(sampleFraction, seed, cols)
+
+ /** Create a row for each cell in Tile with random sampling (no seed). */
+ def rf_explode_tiles_sample(sampleFraction: Double, cols: Column*): Column =
+ ExplodeTiles(sampleFraction, None, cols)
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/StandardColumns.scala b/core/src/main/scala/org/locationtech/rasterframes/StandardColumns.scala
new file mode 100644
index 000000000..2e82ab356
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/StandardColumns.scala
@@ -0,0 +1,93 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
+
+import java.sql.Timestamp
+
+import geotrellis.proj4.CRS
+import geotrellis.raster.Tile
+import geotrellis.spark.{SpatialKey, TemporalKey}
+import geotrellis.vector.{Extent, ProjectedExtent}
+import org.apache.spark.sql.functions.col
+import org.locationtech.jts.geom.{Point => jtsPoint, Polygon => jtsPolygon}
+import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
+
+/**
+ * Constants identifying column in most RasterFrames.
+ *
+ * @since 2/19/18
+ */
+trait StandardColumns {
+ /** Default RasterFrameLayer spatial column name. */
+ val SPATIAL_KEY_COLUMN = col("spatial_key").as[SpatialKey]
+
+ /** Default RasterFrameLayer temporal column name. */
+ val TEMPORAL_KEY_COLUMN = col("temporal_key").as[TemporalKey]
+
+ /** Default RasterFrameLayer timestamp column name */
+ val TIMESTAMP_COLUMN = col("timestamp").as[Timestamp]
+
+ /** Default RasterFrameLayer column name for an tile extent as geometry value. */
+ // This is a `def` because `PolygonUDT` needs to be initialized first.
+ def GEOMETRY_COLUMN = col("geometry").as[jtsPolygon]
+
+ /** Default RasterFrameLayer column name for the center coordinates of the tile's bounds. */
+ // This is a `def` because `PointUDT` needs to be initialized first.
+ def CENTER_COLUMN = col("center").as[jtsPoint]
+
+ /** Default Extent column name. */
+ def EXTENT_COLUMN = col("extent").as[Extent]
+
+ /** Default ProjectedExtent column name. */
+ def PROJECTED_EXTENT_COLUMN = col("proj_extent").as[ProjectedExtent]
+
+ /** Default CRS column name. */
+ def CRS_COLUMN = col("crs").as[CRS]
+
+ /** Default RasterFrameLayer column name for an added spatial index. */
+ val SPATIAL_INDEX_COLUMN = col("spatial_index").as[Long]
+
+ /** Default RasterFrameLayer tile column name. */
+ // This is a `def` because `TileUDT` needs to be initialized first.
+ def TILE_COLUMN = col("tile").as[Tile]
+
+ /** Default column name for a tile with its CRS and Extent. */
+ def PROJECTED_RASTER_COLUMN = col("proj_raster").as[ProjectedRasterTile]
+
+ /** Default RasterFrameLayer `TileFeature.data` column name. */
+ val TILE_FEATURE_DATA_COLUMN = col("tile_data")
+
+ /** Default GeoTiff tags column. */
+ val METADATA_COLUMN = col("metadata").as[Map[String, String]]
+
+ /** Default column index column for the cells of exploded tiles. */
+ val COLUMN_INDEX_COLUMN = col("column_index").as[Int]
+
+ /** Default teil column index column for the cells of exploded tiles. */
+ val ROW_INDEX_COLUMN = col("row_index").as[Int]
+
+ /** URI/URL/S3 path to raster. */
+ val PATH_COLUMN = col("path").as[String]
+}
+
+object StandardColumns extends StandardColumns
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CRSEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/CRSEncoder.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/CRSEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/CRSEncoder.scala
index b6a188d71..39ed8d6f3 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CRSEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/CRSEncoder.scala
@@ -15,13 +15,14 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
-
-import astraea.spark.rasterframes.util.CRSParser
+package org.locationtech.rasterframes.encoders
import geotrellis.proj4.CRS
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.locationtech.rasterframes.model.LazyCRS
/**
* Custom encoder for GT `CRS`.
@@ -33,5 +34,5 @@ object CRSEncoder {
"crsProj4", "toProj4String", (CRSEncoder.getClass, "fromString")
)
// Not sure why this delegate is necessary, but doGenCode fails without it.
- def fromString(str: String): CRS = CRSParser(str)
+ def fromString(str: String): CRS = LazyCRS(str)
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializer.scala
similarity index 82%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializer.scala
index 3f09e1f38..831411557 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializer.scala
@@ -19,9 +19,9 @@
*
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
-import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
+import CatalystSerializer.CatalystIO
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.util.ArrayData
@@ -50,6 +50,8 @@ trait CatalystSerializer[T] extends Serializable {
object CatalystSerializer extends StandardSerializers {
def apply[T: CatalystSerializer]: CatalystSerializer[T] = implicitly
+ def schemaOf[T: CatalystSerializer]: StructType = apply[T].schema
+
/**
* For some reason `Row` and `InternalRow` share no common base type. Instead of using
* structural types (which use reflection), this typeclass is used to normalize access
@@ -61,8 +63,8 @@ object CatalystSerializer extends StandardSerializers {
def create(values: Any*): R
def to[T: CatalystSerializer](t: T): R = CatalystSerializer[T].to(t, this)
def toSeq[T: CatalystSerializer](t: Seq[T]): AnyRef
- def get[T: CatalystSerializer](d: R, ordinal: Int): T
- def getSeq[T: CatalystSerializer](d: R, ordinal: Int): Seq[T]
+ def get[T >: Null: CatalystSerializer](d: R, ordinal: Int): T
+ def getSeq[T >: Null: CatalystSerializer](d: R, ordinal: Int): Seq[T]
def isNullAt(d: R, ordinal: Int): Boolean
def getBoolean(d: R, ordinal: Int): Boolean
def getByte(d: R, ordinal: Int): Byte
@@ -91,14 +93,14 @@ object CatalystSerializer extends StandardSerializers {
override def getString(d: R, ordinal: Int): String = d.getString(ordinal)
override def getByteArray(d: R, ordinal: Int): Array[Byte] =
d.get(ordinal).asInstanceOf[Array[Byte]]
- override def get[T: CatalystSerializer](d: R, ordinal: Int): T = {
+ override def get[T >: Null: CatalystSerializer](d: R, ordinal: Int): T = {
d.getAs[Any](ordinal) match {
case r: Row => r.to[T]
case o => o.asInstanceOf[T]
}
}
override def toSeq[T: CatalystSerializer](t: Seq[T]): AnyRef = t.map(_.toRow)
- override def getSeq[T: CatalystSerializer](d: R, ordinal: Int): Seq[T] =
+ override def getSeq[T >: Null: CatalystSerializer](d: R, ordinal: Int): Seq[T] =
d.getSeq[Row](ordinal).map(_.to[T])
override def encode(str: String): String = str
}
@@ -118,7 +120,7 @@ object CatalystSerializer extends StandardSerializers {
override def getDouble(d: InternalRow, ordinal: Int): Double = d.getDouble(ordinal)
override def getString(d: InternalRow, ordinal: Int): String = d.getString(ordinal)
override def getByteArray(d: InternalRow, ordinal: Int): Array[Byte] = d.getBinary(ordinal)
- override def get[T: CatalystSerializer](d: InternalRow, ordinal: Int): T = {
+ override def get[T >: Null: CatalystSerializer](d: InternalRow, ordinal: Int): T = {
val ser = CatalystSerializer[T]
val struct = d.getStruct(ordinal, ser.schema.size)
struct.to[T]
@@ -127,7 +129,7 @@ object CatalystSerializer extends StandardSerializers {
override def toSeq[T: CatalystSerializer](t: Seq[T]): ArrayData =
ArrayData.toArrayData(t.map(_.toInternalRow).toArray)
- override def getSeq[T: CatalystSerializer](d: InternalRow, ordinal: Int): Seq[T] = {
+ override def getSeq[T >: Null: CatalystSerializer](d: InternalRow, ordinal: Int): Seq[T] = {
val ad = d.getArray(ordinal)
val result = Array.ofDim[Any](ad.numElements()).asInstanceOf[Array[T]]
ad.foreach(
@@ -141,15 +143,20 @@ object CatalystSerializer extends StandardSerializers {
}
implicit class WithToRow[T: CatalystSerializer](t: T) {
- def toInternalRow: InternalRow = CatalystSerializer[T].toInternalRow(t)
- def toRow: Row = CatalystSerializer[T].toRow(t)
+ def toInternalRow: InternalRow = if (t == null) null else CatalystSerializer[T].toInternalRow(t)
+ def toRow: Row = if (t == null) null else CatalystSerializer[T].toRow(t)
}
implicit class WithFromInternalRow(val r: InternalRow) extends AnyVal {
- def to[T: CatalystSerializer]: T = CatalystSerializer[T].fromInternalRow(r)
+ def to[T >: Null: CatalystSerializer]: T = if (r == null) null else CatalystSerializer[T].fromInternalRow(r)
}
implicit class WithFromRow(val r: Row) extends AnyVal {
- def to[T: CatalystSerializer]: T = CatalystSerializer[T].fromRow(r)
+ def to[T >: Null: CatalystSerializer]: T = if (r == null) null else CatalystSerializer[T].fromRow(r)
+ }
+
+ implicit class WithTypeConformity(val left: DataType) extends AnyVal {
+ def conformsTo[T >: Null: CatalystSerializer]: Boolean =
+ org.apache.spark.sql.rf.WithTypeConformity(left).conformsTo(schemaOf[T])
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializerEncoder.scala
similarity index 98%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializerEncoder.scala
index 27e452329..792b74165 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializerEncoder.scala
@@ -19,7 +19,8 @@
*
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
+
import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions._
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/CellTypeEncoder.scala
similarity index 93%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/CellTypeEncoder.scala
index 953c2ed65..ea01d4143 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/CellTypeEncoder.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
import geotrellis.raster.{CellType, DataType}
import org.apache.spark.sql.catalyst.ScalaReflection
@@ -26,7 +28,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.rf.VersionShims.InvokeSafely
import org.apache.spark.sql.types.{ObjectType, StringType}
import org.apache.spark.unsafe.types.UTF8String
-
+import CatalystSerializer._
import scala.reflect.classTag
/**
@@ -41,7 +43,7 @@ object CellTypeEncoder {
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.objects._
val ctType = ScalaReflection.dataTypeFor[DataType]
- val schema = CatalystSerializer[CellType].schema
+ val schema = schemaOf[CellType]
val inputObject = BoundReference(0, ctType, nullable = false)
val intermediateType = ObjectType(classOf[String])
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/DelegatingSubfieldEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/DelegatingSubfieldEncoder.scala
similarity index 96%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/DelegatingSubfieldEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/DelegatingSubfieldEncoder.scala
index 9b984b8ad..cf4c2e5ac 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/DelegatingSubfieldEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/DelegatingSubfieldEncoder.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedAttribute, UnresolvedExtractValue}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/EnvelopeEncoder.scala
similarity index 59%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/EnvelopeEncoder.scala
index 5888a1974..50d66f3e0 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/EnvelopeEncoder.scala
@@ -1,6 +1,27 @@
-package astraea.spark.rasterframes.encoders
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
-import com.vividsolutions.jts.geom.Envelope
+package org.locationtech.rasterframes.encoders
+
+import org.locationtech.jts.geom.Envelope
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -8,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.NewInstance
import org.apache.spark.sql.catalyst.expressions.{BoundReference, CreateNamedStruct, Literal}
import org.apache.spark.sql.rf.VersionShims.InvokeSafely
import org.apache.spark.sql.types._
-
+import CatalystSerializer._
import scala.reflect.classTag
/**
@@ -18,7 +39,7 @@ import scala.reflect.classTag
*/
object EnvelopeEncoder {
- val schema = CatalystSerializer[Envelope].schema
+ val schema = schemaOf[Envelope]
val dataType: DataType = ScalaReflection.dataTypeFor[Envelope]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/ProjectedExtentEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/ProjectedExtentEncoder.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/ProjectedExtentEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/ProjectedExtentEncoder.scala
index 0599f9848..f5b078159 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/ProjectedExtentEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/ProjectedExtentEncoder.scala
@@ -15,11 +15,13 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.vector.ProjectedExtent
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/SparkBasicEncoders.scala
similarity index 90%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/SparkBasicEncoders.scala
index 670d2e217..e2830f7f1 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/SparkBasicEncoders.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
import org.apache.spark.sql.{Encoder, Encoders}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -38,3 +40,4 @@ private[rasterframes] trait SparkBasicEncoders {
implicit val boolEnc: Encoder[Boolean] = Encoders.scalaBoolean
}
+object SparkBasicEncoders extends SparkBasicEncoders
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala
similarity index 83%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala
index 625eea1cd..256da58d8 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala
@@ -15,28 +15,31 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
import java.net.URI
import java.sql.Timestamp
-import astraea.spark.rasterframes.model._
-import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics}
-import com.vividsolutions.jts.geom.Envelope
+import org.locationtech.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics}
+import org.locationtech.jts.geom.Envelope
import geotrellis.proj4.CRS
-import geotrellis.raster.{CellSize, CellType, Tile, TileLayout}
+import geotrellis.raster.{CellSize, CellType, Raster, Tile, TileLayout}
import geotrellis.spark.tiling.LayoutDefinition
import geotrellis.spark.{KeyBounds, SpaceTimeKey, SpatialKey, TemporalKey, TemporalProjectedExtent, TileLayerMetadata}
import geotrellis.vector.{Extent, ProjectedExtent}
+import org.apache.spark.sql.{Encoder, Encoders}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.locationtech.geomesa.spark.jts.encoders.SpatialEncoders
+import org.locationtech.rasterframes.model.{CellContext, Cells, TileContext, TileDataContext}
import scala.reflect.runtime.universe._
/**
- * Implicit encoder definitions for RasterFrame types.
+ * Implicit encoder definitions for RasterFrameLayer types.
*/
trait StandardEncoders extends SpatialEncoders {
object PrimitiveEncoders extends SparkBasicEncoders
@@ -48,6 +51,7 @@ trait StandardEncoders extends SpatialEncoders {
implicit def stkBoundsEncoder: ExpressionEncoder[KeyBounds[SpaceTimeKey]] = ExpressionEncoder()
implicit def extentEncoder: ExpressionEncoder[Extent] = ExpressionEncoder[Extent]()
implicit def singlebandTileEncoder: ExpressionEncoder[Tile] = ExpressionEncoder()
+ implicit def rasterEncoder: ExpressionEncoder[Raster[Tile]] = ExpressionEncoder()
implicit def tileLayerMetadataEncoder[K: TypeTag]: ExpressionEncoder[TileLayerMetadata[K]] = TileLayerMetadataEncoder()
implicit def crsEncoder: ExpressionEncoder[CRS] = CRSEncoder()
implicit def projectedExtentEncoder: ExpressionEncoder[ProjectedExtent] = ProjectedExtentEncoder()
@@ -66,6 +70,7 @@ trait StandardEncoders extends SpatialEncoders {
implicit def cellsEncoder: ExpressionEncoder[Cells] = Cells.encoder
implicit def tileContextEncoder: ExpressionEncoder[TileContext] = TileContext.encoder
implicit def tileDataContextEncoder: ExpressionEncoder[TileDataContext] = TileDataContext.encoder
+ implicit def extentTilePairEncoder: Encoder[(ProjectedExtent, Tile)] = Encoders.tuple(projectedExtentEncoder, singlebandTileEncoder)
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardSerializers.scala
similarity index 81%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/StandardSerializers.scala
index aaff5c534..affe545b8 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardSerializers.scala
@@ -19,16 +19,18 @@
*
*/
-package astraea.spark.rasterframes.encoders
-import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
-import astraea.spark.rasterframes.util.CRSParser
-import com.vividsolutions.jts.geom.Envelope
+package org.locationtech.rasterframes.encoders
+
import geotrellis.proj4.CRS
import geotrellis.raster._
import geotrellis.spark._
import geotrellis.spark.tiling.LayoutDefinition
import geotrellis.vector._
import org.apache.spark.sql.types._
+import org.locationtech.jts.geom.Envelope
+import org.locationtech.rasterframes.TileType
+import org.locationtech.rasterframes.encoders.CatalystSerializer.{CatalystIO, _}
+import org.locationtech.rasterframes.model.LazyCRS
/** Collection of CatalystSerializers for third-party types. */
trait StandardSerializers {
@@ -77,7 +79,7 @@ trait StandardSerializers {
)
)
override def from[R](row: R, io: CatalystIO[R]): CRS =
- CRSParser(io.getString(row, 0))
+ LazyCRS(io.getString(row, 0))
}
implicit val cellTypeSerializer: CatalystSerializer[CellType] = new CatalystSerializer[CellType] {
@@ -93,8 +95,8 @@ trait StandardSerializers {
implicit val projectedExtentSerializer: CatalystSerializer[ProjectedExtent] = new CatalystSerializer[ProjectedExtent] {
override def schema: StructType = StructType(Seq(
- StructField("extent", CatalystSerializer[Extent].schema, false),
- StructField("crs", CatalystSerializer[CRS].schema, false)
+ StructField("extent", schemaOf[Extent], false),
+ StructField("crs", schemaOf[CRS], false)
))
override protected def to[R](t: ProjectedExtent, io: CatalystSerializer.CatalystIO[R]): R = io.create(
@@ -187,8 +189,8 @@ trait StandardSerializers {
implicit val layoutDefinitionSerializer = new CatalystSerializer[LayoutDefinition] {
override def schema: StructType = StructType(Seq(
- StructField("extent", CatalystSerializer[Extent].schema, true),
- StructField("tileLayout", CatalystSerializer[TileLayout].schema, true)
+ StructField("extent", schemaOf[Extent], true),
+ StructField("tileLayout", schemaOf[TileLayout], true)
))
override protected def to[R](t: LayoutDefinition, io: CatalystIO[R]): R = io.create(
@@ -202,10 +204,10 @@ trait StandardSerializers {
)
}
- implicit def boundsSerializer[T: CatalystSerializer]: CatalystSerializer[KeyBounds[T]] = new CatalystSerializer[KeyBounds[T]] {
+ implicit def boundsSerializer[T >: Null: CatalystSerializer]: CatalystSerializer[KeyBounds[T]] = new CatalystSerializer[KeyBounds[T]] {
override def schema: StructType = StructType(Seq(
- StructField("minKey", CatalystSerializer[T].schema, true),
- StructField("maxKey", CatalystSerializer[T].schema, true)
+ StructField("minKey", schemaOf[T], true),
+ StructField("maxKey", schemaOf[T], true)
))
override protected def to[R](t: KeyBounds[T], io: CatalystIO[R]): R = io.create(
@@ -219,13 +221,13 @@ trait StandardSerializers {
)
}
- def tileLayerMetadataSerializer[T: CatalystSerializer]: CatalystSerializer[TileLayerMetadata[T]] = new CatalystSerializer[TileLayerMetadata[T]] {
+ def tileLayerMetadataSerializer[T >: Null: CatalystSerializer]: CatalystSerializer[TileLayerMetadata[T]] = new CatalystSerializer[TileLayerMetadata[T]] {
override def schema: StructType = StructType(Seq(
- StructField("cellType", CatalystSerializer[CellType].schema, false),
- StructField("layout", CatalystSerializer[LayoutDefinition].schema, false),
- StructField("extent", CatalystSerializer[Extent].schema, false),
- StructField("crs", CatalystSerializer[CRS].schema, false),
- StructField("bounds", CatalystSerializer[KeyBounds[T]].schema, false)
+ StructField("cellType", schemaOf[CellType], false),
+ StructField("layout", schemaOf[LayoutDefinition], false),
+ StructField("extent", schemaOf[Extent], false),
+ StructField("crs", schemaOf[CRS], false),
+ StructField("bounds", schemaOf[KeyBounds[T]], false)
))
override protected def to[R](t: TileLayerMetadata[T], io: CatalystIO[R]): R = io.create(
@@ -245,6 +247,25 @@ trait StandardSerializers {
)
}
+ implicit def rasterSerializer: CatalystSerializer[Raster[Tile]] = new CatalystSerializer[Raster[Tile]] {
+ import org.apache.spark.sql.rf.TileUDT.tileSerializer
+
+ override def schema: StructType = StructType(Seq(
+ StructField("tile", TileType, false),
+ StructField("extent", schemaOf[Extent], false)
+ ))
+
+ override protected def to[R](t: Raster[Tile], io: CatalystIO[R]): R = io.create(
+ io.to(t.tile),
+ io.to(t.extent)
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): Raster[Tile] = Raster(
+ io.get[Tile](t, 0),
+ io.get[Extent](t, 1)
+ )
+ }
+
implicit val spatialKeyTLMSerializer = tileLayerMetadataSerializer[SpatialKey]
implicit val spaceTimeKeyTLMSerializer = tileLayerMetadataSerializer[SpaceTimeKey]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/StringBackedEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/StringBackedEncoder.scala
similarity index 96%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/StringBackedEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/StringBackedEncoder.scala
index 8dc950b4b..2ec265ccc 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/StringBackedEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/StringBackedEncoder.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/TemporalProjectedExtentEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/TemporalProjectedExtentEncoder.scala
similarity index 76%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/TemporalProjectedExtentEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/TemporalProjectedExtentEncoder.scala
index 5e44bd7fe..f69f7f160 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/TemporalProjectedExtentEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/TemporalProjectedExtentEncoder.scala
@@ -15,20 +15,20 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
-
-import java.time.ZonedDateTime
+package org.locationtech.rasterframes.encoders
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.spark.TemporalProjectedExtent
-import geotrellis.vector.ProjectedExtent
import org.apache.spark.sql.Encoders
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
/**
- * Custom encoder for [[ProjectedExtent]]. Necessary because [[geotrellis.proj4.CRS]] within [[ProjectedExtent]] isn't a case class, and [[ZonedDateTime]] doesn't have a natural encoder.
+ * Custom encoder for `TemporalProjectedExtent`. Necessary because `geotrellis.proj4.CRS` within
+ * `ProjectedExtent` isn't a case class, and `ZonedDateTime` doesn't have a natural encoder.
*
* @since 8/2/17
*/
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/TileLayerMetadataEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/TileLayerMetadataEncoder.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/TileLayerMetadataEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/TileLayerMetadataEncoder.scala
index c2ed1bbd4..2f59ea451 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/TileLayerMetadataEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/TileLayerMetadataEncoder.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
import geotrellis.spark.{KeyBounds, TileLayerMetadata}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -31,7 +33,7 @@ import scala.reflect.runtime.universe._
* @since 7/21/17
*/
object TileLayerMetadataEncoder {
- import astraea.spark.rasterframes._
+ import org.locationtech.rasterframes._
private def fieldEncoders = Seq[(String, ExpressionEncoder[_])](
"cellType" -> cellTypeEncoder,
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/URIEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/URIEncoder.scala
similarity index 92%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/URIEncoder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/URIEncoder.scala
index d50cd7803..bbbcf25ea 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/URIEncoder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/URIEncoder.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
import java.net.URI
diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/package.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/package.scala
similarity index 81%
rename from core/src/main/scala/astraea/spark/rasterframes/encoders/package.scala
rename to core/src/main/scala/org/locationtech/rasterframes/encoders/package.scala
index 678bbfcd1..8cb5a6f85 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/encoders/package.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/package.scala
@@ -15,10 +15,13 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
+import org.apache.spark.sql.rf._
import org.apache.spark.sql.Column
import org.apache.spark.sql.catalyst.expressions.Literal
@@ -41,7 +44,12 @@ package object encoders {
/** Constructs a catalyst literal expression from anything with a serializer. */
def SerializedLiteral[T >: Null: CatalystSerializer](t: T): Literal = {
val ser = CatalystSerializer[T]
- Literal.create(ser.toInternalRow(t), ser.schema)
+ val schema = ser.schema match {
+ case s if s.conformsTo(TileType.sqlType) => TileType
+ case s if s.conformsTo(RasterSourceType.sqlType) => RasterSourceType
+ case s => s
+ }
+ Literal.create(ser.toInternalRow(t), schema)
}
/** Constructs a Dataframe literal column from anything with a serializer. */
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala
similarity index 94%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala
index 3fac44c65..bd55345fa 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package org.locationtech.rasterframes.expressions
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.DynamicExtractors._
import com.typesafe.scalalogging.LazyLogging
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterOp.scala
similarity index 93%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterOp.scala
index 02f8fc29e..690658064 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterOp.scala
@@ -19,9 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions
-import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
+package org.locationtech.rasterframes.expressions
+
+import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
import com.typesafe.scalalogging.LazyLogging
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala
similarity index 68%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala
index 1dabc8201..6a7e6e421 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala
@@ -19,26 +19,28 @@
*
*/
-package astraea.spark.rasterframes.expressions
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.model.TileContext
-import astraea.spark.rasterframes.ref.{ProjectedRasterLike, RasterRef, RasterSource}
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
+package org.locationtech.rasterframes.expressions
+
+import geotrellis.proj4.CRS
import geotrellis.raster.{CellGrid, Tile}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.rf.{TileUDT, _}
+import org.apache.spark.sql.rf.{RasterSourceUDT, TileUDT}
import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.model.{LazyCRS, TileContext}
+import org.locationtech.rasterframes.ref.{ProjectedRasterLike, RasterRef, RasterSource}
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
-private[expressions]
+private[rasterframes]
object DynamicExtractors {
/** Partial function for pulling a tile and its contesxt from an input row. */
lazy val tileExtractor: PartialFunction[DataType, InternalRow => (Tile, Option[TileContext])] = {
case _: TileUDT =>
(row: InternalRow) =>
(row.to[Tile](TileUDT.tileSerializer), None)
- case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) =>
+ case t if t.conformsTo[ProjectedRasterTile] =>
(row: InternalRow) => {
val prt = row.to[ProjectedRasterTile]
(prt, Some(TileContext(prt)))
@@ -48,7 +50,7 @@ object DynamicExtractors {
lazy val rowTileExtractor: PartialFunction[DataType, Row => (Tile, Option[TileContext])] = {
case _: TileUDT =>
(row: Row) => (row.to[Tile](TileUDT.tileSerializer), None)
- case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) =>
+ case t if t.conformsTo[ProjectedRasterTile] =>
(row: Row) => {
val prt = row.to[ProjectedRasterTile]
(prt, Some(TileContext(prt)))
@@ -58,21 +60,30 @@ object DynamicExtractors {
/** Partial function for pulling a ProjectedRasterLike an input row. */
lazy val projectedRasterLikeExtractor: PartialFunction[DataType, InternalRow ⇒ ProjectedRasterLike] = {
case _: RasterSourceUDT ⇒
- (row: InternalRow) ⇒ row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer)
- case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) =>
+ (row: InternalRow) => row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer)
+ case t if t.conformsTo[ProjectedRasterTile] =>
(row: InternalRow) => row.to[ProjectedRasterTile]
- case t if t.conformsTo(CatalystSerializer[RasterRef].schema) =>
- (row: InternalRow) ⇒ row.to[RasterRef]
+ case t if t.conformsTo[RasterRef] =>
+ (row: InternalRow) => row.to[RasterRef]
}
/** Partial function for pulling a CellGrid from an input row. */
lazy val gridExtractor: PartialFunction[DataType, InternalRow ⇒ CellGrid] = {
- case _: TileUDT ⇒
- (row: InternalRow) ⇒ row.to[Tile](TileUDT.tileSerializer)
- case _: RasterSourceUDT ⇒
- (row: InternalRow) ⇒ row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer)
- case t if t.conformsTo(CatalystSerializer[RasterRef].schema) ⇒
- (row: InternalRow) ⇒ row.to[RasterRef]
+ case _: TileUDT =>
+ (row: InternalRow) => row.to[Tile](TileUDT.tileSerializer)
+ case _: RasterSourceUDT =>
+ (row: InternalRow) => row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer)
+ case t if t.conformsTo[RasterRef] ⇒
+ (row: InternalRow) => row.to[RasterRef]
+ case t if t.conformsTo[ProjectedRasterTile] =>
+ (row: InternalRow) => row.to[ProjectedRasterTile]
+ }
+
+ lazy val crsExtractor: PartialFunction[DataType, Any => CRS] = {
+ case _: StringType =>
+ (v: Any) => LazyCRS(v.asInstanceOf[UTF8String].toString)
+ case t if t.conformsTo[CRS] =>
+ (v: Any) => v.asInstanceOf[InternalRow].to[CRS]
}
sealed trait TileOrNumberArg
@@ -106,9 +117,10 @@ object DynamicExtractors {
lazy val intArgExtractor: PartialFunction[DataType, Any => IntegerArg] = {
case _: IntegerType | _: ByteType | _: ShortType => {
case i: Int => IntegerArg(i)
- case b: Byte => IntegerArg(b)
+ case b: Byte => IntegerArg(b.toInt)
case s: Short => IntegerArg(s.toInt)
case c: Char => IntegerArg(c.toInt)
}
}
+
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/NullToValue.scala
similarity index 95%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/NullToValue.scala
index edc52fcf7..8bc98c1e2 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/NullToValue.scala
@@ -19,7 +19,8 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package org.locationtech.rasterframes.expressions
+
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.UnaryExpression
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/OnCellGridExpression.scala
similarity index 93%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/OnCellGridExpression.scala
index b856ae2be..05d56f7d1 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/OnCellGridExpression.scala
@@ -19,9 +19,9 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package org.locationtech.rasterframes.expressions
-import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import org.locationtech.rasterframes.expressions.DynamicExtractors._
import geotrellis.raster.CellGrid
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/OnTileContextExpression.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/OnTileContextExpression.scala
index a8797ae49..78ebd1f5b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/OnTileContextExpression.scala
@@ -19,14 +19,14 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package org.locationtech.rasterframes.expressions
-import astraea.spark.rasterframes.expressions.DynamicExtractors._
-import astraea.spark.rasterframes.model.TileContext
+import org.locationtech.rasterframes.expressions.DynamicExtractors._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
import org.apache.spark.sql.catalyst.expressions.UnaryExpression
+import org.locationtech.rasterframes.model.TileContext
/**
* Implements boilerplate for subtype expressions processing TileUDT (when ProjectedRasterTile), RasterSourceUDT, and
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/SpatialRelation.scala
similarity index 90%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/SpatialRelation.scala
index e994c8a64..1d6697048 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/SpatialRelation.scala
@@ -15,12 +15,16 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.expressions
+package org.locationtech.rasterframes.expressions
-import astraea.spark.rasterframes.expressions.SpatialRelation.RelationPredicate
-import com.vividsolutions.jts.geom._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.SpatialRelation.RelationPredicate
+import geotrellis.vector.Extent
+import org.locationtech.jts.geom._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -43,6 +47,9 @@ abstract class SpatialRelation extends BinaryExpression
case r: InternalRow ⇒
expr.dataType match {
case udt: AbstractGeometryUDT[_] ⇒ udt.deserialize(r)
+ case dt if dt.conformsTo[Extent] =>
+ val extent = r.to[Extent]
+ extent.jtsGeom
}
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/TileAssembler.scala
similarity index 77%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/TileAssembler.scala
index c3a32267f..c3fe0e17b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/TileAssembler.scala
@@ -19,26 +19,44 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package org.locationtech.rasterframes.expressions
import java.nio.ByteBuffer
-import astraea.spark.rasterframes.expressions.TileAssembler.TileBuffer
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes.expressions.TileAssembler.TileBuffer
+import org.locationtech.rasterframes.util._
import geotrellis.raster.{DataType => _, _}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
-import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
-import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, TypedColumn}
import spire.syntax.cfor._
+import org.locationtech.rasterframes.TileType
/**
* Aggregator for reassembling tiles from from exploded form
*
* @since 9/24/17
*/
+@ExpressionDescription(
+ usage = "_FUNC_(colIndex, rowIndex, cellValue, tileCols, tileRows) - Assemble tiles from set of column and row indices and cell values.",
+ arguments = """
+ Arguments:
+ * colIndex - column to place the cellValue in the generated tile
+ * rowIndex - row to place the cellValue in the generated tile
+ * cellValue - numeric value to place in the generated tile at colIndex and rowIndex
+ * tileCols - number of columns in the generated tile
+ * tileRows - number of rows in the generated tile""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(column_index, row_index, cell0, 10, 10) as tile;
+ ...
+ > SELECT _FUNC_(column_index, row_index, tile, 10, 10) as tile2
+ FROM (SELECT rf_explode_tiles(rf_make_constant_tile(4, 10, 10, 'int8raw')) as tile)
+ ...
+ """
+)
case class TileAssembler(
colIndex: Expression,
rowIndex: Expression,
@@ -49,13 +67,17 @@ case class TileAssembler(
inputAggBufferOffset: Int = 0)
extends TypedImperativeAggregate[TileBuffer] with ImplicitCastInputTypes {
+ def this(colIndex: Expression,
+ rowIndex: Expression,
+ cellValue: Expression,
+ tileCols: Expression,
+ tileRows: Expression) = this(colIndex, rowIndex, cellValue, tileCols, tileRows, 0, 0)
+
override def children: Seq[Expression] = Seq(colIndex, rowIndex, cellValue, tileCols, tileRows)
override def inputTypes = Seq(ShortType, ShortType, DoubleType, ShortType, ShortType)
- private val TileType = new TileUDT()
-
- override def prettyName: String = "assemble_tiles"
+ override def prettyName: String = "rf_assemble_tiles"
override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
copy(mutableAggBufferOffset = newMutableAggBufferOffset)
@@ -118,7 +140,7 @@ case class TileAssembler(
val cells = Array.ofDim[Double](length)
result.get(cells)
val (tileCols, tileRows) = buffer.tileSize
- val tile = ArrayTile(cells, tileCols, tileRows)
+ val tile = ArrayTile(cells, tileCols.toInt, tileRows.toInt)
TileType.serialize(tile)
}
@@ -127,7 +149,7 @@ case class TileAssembler(
}
object TileAssembler {
- import astraea.spark.rasterframes.encoders.StandardEncoders._
+ import org.locationtech.rasterframes.encoders.StandardEncoders._
def apply(
columnIndex: Column,
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala
index 049e6d9a1..46969c226 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions
+package org.locationtech.rasterframes.expressions
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.DynamicExtractors._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.DynamicExtractors._
import com.typesafe.scalalogging.LazyLogging
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterAggregate.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterAggregate.scala
index a28ae6753..cfea46ebe 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterAggregate.scala
@@ -19,8 +19,9 @@
*
*/
-package astraea.spark.rasterframes.expressions
-import astraea.spark.rasterframes.expressions.DynamicExtractors.rowTileExtractor
+package org.locationtech.rasterframes.expressions
+
+import org.locationtech.rasterframes.expressions.DynamicExtractors.rowTileExtractor
import geotrellis.raster.Tile
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala
similarity index 90%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala
index f21dc4bb5..8d2b532c8 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala
@@ -19,13 +19,14 @@
*
*/
-package astraea.spark.rasterframes.expressions
-import astraea.spark.rasterframes.expressions.DynamicExtractors._
-import astraea.spark.rasterframes.model.TileContext
+package org.locationtech.rasterframes.expressions
+
+import org.locationtech.rasterframes.expressions.DynamicExtractors._
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
import org.apache.spark.sql.catalyst.expressions.UnaryExpression
+import org.locationtech.rasterframes.model.TileContext
/** Boilerplate for expressions operating on a single Tile-like . */
trait UnaryRasterOp extends UnaryExpression {
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala
similarity index 72%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala
index 7cb7ba3b1..4fc0a0374 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala
@@ -19,25 +19,26 @@
*
*/
-package astraea.spark.rasterframes.expressions.accessors
+package org.locationtech.rasterframes.expressions.accessors
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.UnaryRasterOp
-import astraea.spark.rasterframes.model.TileContext
-import astraea.spark.rasterframes.tiles.InternalRowTile
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile.ConcreteProjectedRasterTile
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile.ConcreteProjectedRasterTile
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.model.TileContext
+import org.locationtech.rasterframes.tiles.InternalRowTile
+import org.locationtech.rasterframes._
/** Expression to extract at tile from several types that contain tiles.*/
case class ExtractTile(child: Expression) extends UnaryRasterOp with CodegenFallback {
- override def dataType: DataType = new TileUDT()
+ override def dataType: DataType = TileType
- override def nodeName: String = "extract_tile"
+ override def nodeName: String = "rf_extract_tile"
implicit val tileSer = TileUDT.tileSerializer
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = tile match {
case irt: InternalRowTile => irt.mem
@@ -47,7 +48,7 @@ case class ExtractTile(child: Expression) extends UnaryRasterOp with CodegenFall
}
object ExtractTile {
- import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+ import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
def apply(input: Column): TypedColumn[Any, Tile] =
new Column(new ExtractTile(input.expr)).as[Tile]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCRS.scala
similarity index 69%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCRS.scala
index 1a6d29df0..10efc40b7 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCRS.scala
@@ -19,28 +19,34 @@
*
*/
-package astraea.spark.rasterframes.expressions.accessors
+package org.locationtech.rasterframes.expressions.accessors
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.encoders.StandardEncoders.crsEncoder
-import astraea.spark.rasterframes.expressions.OnTileContextExpression
-import astraea.spark.rasterframes.model.TileContext
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.StandardEncoders.crsEncoder
+import org.locationtech.rasterframes.expressions.OnTileContextExpression
import geotrellis.proj4.CRS
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.model.TileContext
/**
* Expression to extract the CRS out of a RasterRef or ProjectedRasterTile column.
*
* @since 9/9/18
*/
+@ExpressionDescription(
+ usage = "_FUNC_(raster) - Fetches the CRS of a ProjectedRasterTile or RasterSource.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(raster);
+ ....
+ """)
case class GetCRS(child: Expression) extends OnTileContextExpression with CodegenFallback {
- override def dataType: DataType = CatalystSerializer[CRS].schema
- override def nodeName: String = "crs"
+ override def dataType: DataType = schemaOf[CRS]
+ override def nodeName: String = "rf_crs"
override def eval(ctx: TileContext): InternalRow = ctx.crs.toInternalRow
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCellType.scala
similarity index 77%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCellType.scala
index eeb521e4b..869835c5f 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCellType.scala
@@ -19,11 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.accessors
+package org.locationtech.rasterframes.expressions.accessors
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.OnCellGridExpression
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.OnCellGridExpression
import geotrellis.raster.{CellGrid, CellType}
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -36,15 +35,15 @@ import org.apache.spark.sql.{Column, TypedColumn}
*/
case class GetCellType(child: Expression) extends OnCellGridExpression with CodegenFallback {
- override def nodeName: String = "cell_type"
+ override def nodeName: String = "rf_cell_type"
- def dataType: DataType = CatalystSerializer[CellType].schema
+ def dataType: DataType = schemaOf[CellType]
/** Implemented by subtypes to process incoming ProjectedRasterLike entity. */
override def eval(cg: CellGrid): Any = cg.cellType.toInternalRow
}
object GetCellType {
- import astraea.spark.rasterframes.encoders.StandardEncoders._
+ import org.locationtech.rasterframes.encoders.StandardEncoders._
def apply(col: Column): TypedColumn[Any, CellType] =
new Column(new GetCellType(col.expr)).as[CellType]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetDimensions.scala
similarity index 59%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetDimensions.scala
index 3589dbc1b..dffdfdecb 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetDimensions.scala
@@ -19,31 +19,36 @@
*
*/
-package astraea.spark.rasterframes.expressions.accessors
+package org.locationtech.rasterframes.expressions.accessors
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.OnCellGridExpression
-import astraea.spark.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.OnCellGridExpression
import geotrellis.raster.CellGrid
import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.locationtech.rasterframes.model.TileDimensions
/**
- * Extract a Tile's dimensions
+ * Extract a raster's dimensions
* @since 12/21/17
*/
-case class GetDimensions(child: Expression) extends OnCellGridExpression
- with CodegenFallback {
- override def nodeName: String = "tile_dimensions"
+@ExpressionDescription(
+ usage = "_FUNC_(raster) - Fetches the dimensions (columns & rows) of a Tile, ProjectedRasterTile or RasterSource.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(raster);
+ ....
+ """)
+case class GetDimensions(child: Expression) extends OnCellGridExpression with CodegenFallback {
+ override def nodeName: String = "rf_dimensions"
- def dataType = CatalystSerializer[TileDimensions].schema
+ def dataType = schemaOf[TileDimensions]
override def eval(grid: CellGrid): Any = TileDimensions(grid.cols, grid.rows).toInternalRow
}
object GetDimensions {
- def apply(col: Column): Column =
+ def apply(col: Column): TypedColumn[Any, TileDimensions] =
new Column(new GetDimensions(col.expr)).as[TileDimensions]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetEnvelope.scala
similarity index 86%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetEnvelope.scala
index 551f64eb0..d0c14491b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetEnvelope.scala
@@ -19,10 +19,9 @@
*
*/
-package astraea.spark.rasterframes.expressions.accessors
+package org.locationtech.rasterframes.expressions.accessors
-import astraea.spark.rasterframes.encoders.EnvelopeEncoder
-import com.vividsolutions.jts.geom.{Envelope, Geometry}
+import org.locationtech.jts.geom.{Envelope, Geometry}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
@@ -30,13 +29,14 @@ import org.apache.spark.sql.jts.AbstractGeometryUDT
import org.apache.spark.sql.rf._
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.encoders.EnvelopeEncoder
/**
* Extracts the bounding box (envelope) of arbitrary JTS Geometry.
*
* @since 2/22/18
*/
-@deprecated("Replace usages of this with GeometryToBounds", "11/4/2018")
+@deprecated("Replace usages of this with GeometryToExtent", "11/4/2018")
case class GetEnvelope(child: Expression) extends UnaryExpression with CodegenFallback {
override def nodeName: String = "envelope"
@@ -60,7 +60,7 @@ case class GetEnvelope(child: Expression) extends UnaryExpression with CodegenFa
}
object GetEnvelope {
- import astraea.spark.rasterframes.encoders.StandardEncoders._
+ import org.locationtech.rasterframes.encoders.StandardEncoders._
def apply(col: Column): TypedColumn[Any, Envelope] =
new GetEnvelope(col.expr).asColumn.as[Envelope]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetExtent.scala
similarity index 64%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetExtent.scala
index c3e664887..2266c69b5 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetExtent.scala
@@ -19,28 +19,34 @@
*
*/
-package astraea.spark.rasterframes.expressions.accessors
+package org.locationtech.rasterframes.expressions.accessors
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.encoders.StandardEncoders.extentEncoder
-import astraea.spark.rasterframes.expressions.OnTileContextExpression
-import astraea.spark.rasterframes.model.TileContext
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.StandardEncoders.extentEncoder
+import org.locationtech.rasterframes.expressions.OnTileContextExpression
import geotrellis.vector.Extent
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.model.TileContext
/**
- * Expression to extract the Extent out of a RasterRef or ProjectedRasterTile column.
+ * Expression to extract the Extent out of a RasterSource or ProjectedRasterTile column.
*
* @since 9/10/18
*/
+@ExpressionDescription(
+ usage = "_FUNC_(raster) - Fetches the extent (bounding box or envelope) of a ProjectedRasterTile or RasterSource.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(raster);
+ ....
+ """)
case class GetExtent(child: Expression) extends OnTileContextExpression with CodegenFallback {
- override def dataType: DataType = CatalystSerializer[Extent].schema
- override def nodeName: String = "extent"
+ override def dataType: DataType = schemaOf[Extent]
+ override def nodeName: String = "rf_extent"
override def eval(ctx: TileContext): InternalRow = ctx.extent.toInternalRow
}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetGeometry.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetGeometry.scala
new file mode 100644
index 000000000..7ff3bcfc7
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetGeometry.scala
@@ -0,0 +1,57 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.accessors
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.jts.JTSTypes
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.jts.geom.Geometry
+import org.locationtech.rasterframes.encoders.StandardEncoders.jtsGeometryEncoder
+import org.locationtech.rasterframes.expressions.OnTileContextExpression
+import org.locationtech.rasterframes.model.TileContext
+
+/**
+ * Expression to extract the Extent out of a RasterSource or ProjectedRasterTile column.
+ *
+ * @since 9/10/18
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(raster) - Fetches the extent (bounding box or envelope) of a ProjectedRasterTile or RasterSource.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(raster);
+ ....
+ """)
+case class GetGeometry(child: Expression) extends OnTileContextExpression with CodegenFallback {
+ override def dataType: DataType = JTSTypes.GeometryTypeInstance
+ override def nodeName: String = "rf_geometry"
+ override def eval(ctx: TileContext): InternalRow =
+ JTSTypes.GeometryTypeInstance.serialize(ctx.extent.jtsGeom)
+}
+
+object GetGeometry {
+ def apply(col: Column): TypedColumn[Any, Geometry] =
+ new Column(GetGeometry(col.expr)).as[Geometry]
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala
similarity index 75%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala
index 98b7eb401..6c9a3538a 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala
@@ -19,19 +19,20 @@
*
*/
-package astraea.spark.rasterframes.expressions.accessors
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.UnaryRasterOp
-import astraea.spark.rasterframes.model.TileContext
+package org.locationtech.rasterframes.expressions.accessors
+
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.model.TileContext
case class GetTileContext(child: Expression) extends UnaryRasterOp with CodegenFallback {
- override def dataType: DataType = CatalystSerializer[TileContext].schema
+ override def dataType: DataType = schemaOf[TileContext]
override def nodeName: String = "get_tile_context"
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any =
@@ -39,7 +40,7 @@ case class GetTileContext(child: Expression) extends UnaryRasterOp with CodegenF
}
object GetTileContext {
- import astraea.spark.rasterframes.encoders.StandardEncoders.tileContextEncoder
+ import org.locationtech.rasterframes.encoders.StandardEncoders.tileContextEncoder
def apply(input: Column): TypedColumn[Any, TileContext] =
new Column(new GetTileContext(input.expr)).as[TileContext]
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/RealizeTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/RealizeTile.scala
new file mode 100644
index 000000000..d8c9f0ba6
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/RealizeTile.scala
@@ -0,0 +1,55 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.accessors
+
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.rf.TileUDT
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.model.TileContext
+
+@ExpressionDescription(
+ usage = "_FUNC_(raster) - Extracts the Tile component of a RasterSource, ProjectedRasterTile (or Tile) and ensures the cells are fully fetched.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(raster);
+ ....
+ """)
+case class RealizeTile(child: Expression) extends UnaryRasterOp with CodegenFallback {
+ override def dataType: DataType = TileType
+
+ override def nodeName: String = "rf_tile"
+ implicit val tileSer = TileUDT.tileSerializer
+
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any =
+ (tile.toArrayTile(): Tile).toInternalRow
+}
+
+object RealizeTile {
+ def apply(col: Column): TypedColumn[Any, Tile] =
+ new Column(new RealizeTile(col.expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellCountAggregate.scala
similarity index 83%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellCountAggregate.scala
index 0a4424665..82c2d3f93 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellCountAggregate.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.aggstats
+package org.locationtech.rasterframes.expressions.aggregates
-import astraea.spark.rasterframes.expressions.UnaryRasterAggregate
-import astraea.spark.rasterframes.expressions.tilestats.{DataCells, NoDataCells}
+import org.locationtech.rasterframes.expressions.UnaryRasterAggregate
+import org.locationtech.rasterframes.expressions.tilestats.{DataCells, NoDataCells}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
import org.apache.spark.sql.types.{LongType, Metadata}
@@ -47,8 +47,8 @@ abstract class CellCountAggregate(isData: Boolean) extends UnaryRasterAggregate
)
private def CellTest =
- if (isData) tileOpAsExpression("data_cells", DataCells.op)
- else tileOpAsExpression("no_data_cells", NoDataCells.op)
+ if (isData) tileOpAsExpression("rf_data_cells", DataCells.op)
+ else tileOpAsExpression("rf_no_data_cells", NoDataCells.op)
val updateExpressions = Seq(
If(IsNull(child), count, Add(count, CellTest(child)))
@@ -64,7 +64,7 @@ abstract class CellCountAggregate(isData: Boolean) extends UnaryRasterAggregate
}
object CellCountAggregate {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
@ExpressionDescription(
usage = "_FUNC_(tile) - Count the total data (non-no-data) cells in a tile column.",
@@ -77,7 +77,7 @@ object CellCountAggregate {
92384753"""
)
case class DataCells(child: Expression) extends CellCountAggregate(true) {
- override def nodeName: String = "agg_data_cells"
+ override def nodeName: String = "rf_agg_data_cells"
}
object DataCells {
def apply(tile: Column): TypedColumn[Any, Long] =
@@ -94,7 +94,7 @@ object CellCountAggregate {
23584"""
)
case class NoDataCells(child: Expression) extends CellCountAggregate(false) {
- override def nodeName: String = "agg_no_data_cells"
+ override def nodeName: String = "rf_agg_no_data_cells"
}
object NoDataCells {
def apply(tile: Column): TypedColumn[Any, Long] =
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellMeanAggregate.scala
similarity index 85%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellMeanAggregate.scala
index 846f169cb..009a46cf3 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellMeanAggregate.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.aggstats
+package org.locationtech.rasterframes.expressions.aggregates
-import astraea.spark.rasterframes.expressions.UnaryRasterAggregate
-import astraea.spark.rasterframes.expressions.tilestats.{DataCells, Sum}
+import org.locationtech.rasterframes.expressions.UnaryRasterAggregate
+import org.locationtech.rasterframes.expressions.tilestats.{DataCells, Sum}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
import org.apache.spark.sql.types.{DoubleType, LongType, Metadata}
@@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
....
""")
case class CellMeanAggregate(child: Expression) extends UnaryRasterAggregate {
- override def nodeName: String = "agg_mean"
+ override def nodeName: String = "rf_agg_mean"
private lazy val sum =
AttributeReference("sum", DoubleType, false, Metadata.empty)()
@@ -58,7 +58,7 @@ case class CellMeanAggregate(child: Expression) extends UnaryRasterAggregate {
// Cant' figure out why we can't just use the Expression directly
// this is necessary to properly handle null rows. For example,
// if we use `tilestats.Sum` directly, we get an NPE when the stage is executed.
- private val DataCellCounts = tileOpAsExpression("data_cells", DataCells.op)
+ private val DataCellCounts = tileOpAsExpression("rf_data_cells", DataCells.op)
private val SumCells = tileOpAsExpression("sum_cells", Sum.op)
override val updateExpressions = Seq(
@@ -79,7 +79,7 @@ case class CellMeanAggregate(child: Expression) extends UnaryRasterAggregate {
}
object CellMeanAggregate {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
/** Computes the column aggregate mean. */
def apply(tile: Column): TypedColumn[Any, Double] =
new Column(new CellMeanAggregate(tile.expr).toAggregateExpression()).as[Double]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellStatsAggregate.scala
similarity index 92%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellStatsAggregate.scala
index cfcde38a5..95c0bd837 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellStatsAggregate.scala
@@ -19,16 +19,16 @@
*
*/
-package astraea.spark.rasterframes.expressions.aggstats
+package org.locationtech.rasterframes.expressions.aggregates
-import astraea.spark.rasterframes.expressions.accessors.ExtractTile
-import astraea.spark.rasterframes.stats.CellStatistics
+import org.locationtech.rasterframes.expressions.accessors.ExtractTile
+import org.locationtech.rasterframes.stats.CellStatistics
+import org.locationtech.rasterframes.TileType
import geotrellis.raster.{Tile, _}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
import org.apache.spark.sql.execution.aggregate.ScalaUDAF
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types.{DataType, _}
import org.apache.spark.sql.{Column, Row, TypedColumn}
@@ -40,8 +40,6 @@ import org.apache.spark.sql.{Column, Row, TypedColumn}
case class CellStatsAggregate() extends UserDefinedAggregateFunction {
import CellStatsAggregate.C
// TODO: rewrite as a DeclarativeAggregate
- private val TileType = new TileUDT()
-
override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
override def dataType: DataType = StructType(Seq(
@@ -122,11 +120,11 @@ case class CellStatsAggregate() extends UserDefinedAggregateFunction {
}
object CellStatsAggregate {
- import astraea.spark.rasterframes.encoders.StandardEncoders.cellStatsEncoder
+ import org.locationtech.rasterframes.encoders.StandardEncoders.cellStatsEncoder
def apply(col: Column): TypedColumn[Any, CellStatistics] =
new Column(new CellStatsAggregateUDAF(col.expr))
- .as(s"agg_stats($col)") // node renaming in class doesn't seem to propogate
+ .as(s"rf_agg_stats($col)") // node renaming in class doesn't seem to propogate
.as[CellStatistics]
/** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */
@@ -147,7 +145,7 @@ object CellStatsAggregate {
class CellStatsAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId)
extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new CellStatsAggregate()), Complete, false, NamedExpression.newExprId)
- override def nodeName: String = "agg_stats"
+ override def nodeName: String = "rf_agg_stats"
}
object CellStatsAggregateUDAF {
def apply(child: Expression): CellStatsAggregateUDAF = new CellStatsAggregateUDAF(child)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/HistogramAggregate.scala
similarity index 88%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/HistogramAggregate.scala
index 7920415da..44cc1324b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/HistogramAggregate.scala
@@ -19,13 +19,13 @@
*
*/
-package astraea.spark.rasterframes.expressions.aggstats
+package org.locationtech.rasterframes.expressions.aggregates
import java.nio.ByteBuffer
-import astraea.spark.rasterframes.expressions.accessors.ExtractTile
-import astraea.spark.rasterframes.functions.safeEval
-import astraea.spark.rasterframes.stats.CellHistogram
+import org.locationtech.rasterframes.expressions.accessors.ExtractTile
+import org.locationtech.rasterframes.functions.safeEval
+import org.locationtech.rasterframes.stats.CellHistogram
import geotrellis.raster.Tile
import geotrellis.raster.histogram.{Histogram, StreamingHistogram}
import geotrellis.spark.util.KryoSerializer
@@ -33,9 +33,9 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression,
import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
import org.apache.spark.sql.execution.aggregate.ScalaUDAF
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, Row, TypedColumn}
+import org.locationtech.rasterframes.TileType
/**
* Histogram aggregation function for a full column of tiles.
@@ -45,7 +45,6 @@ import org.apache.spark.sql.{Column, Row, TypedColumn}
case class HistogramAggregate(numBuckets: Int) extends UserDefinedAggregateFunction {
def this() = this(StreamingHistogram.DEFAULT_NUM_BUCKETS)
// TODO: rewrite as TypedAggregateExpression or similar.
- private val TileType = new TileUDT()
override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil)
@@ -96,11 +95,11 @@ case class HistogramAggregate(numBuckets: Int) extends UserDefinedAggregateFunct
}
object HistogramAggregate {
- import astraea.spark.rasterframes.encoders.StandardEncoders.cellHistEncoder
+ import org.locationtech.rasterframes.encoders.StandardEncoders.cellHistEncoder
def apply(col: Column): TypedColumn[Any, CellHistogram] =
new Column(new HistogramAggregateUDAF(col.expr))
- .as(s"agg_approx_histogram($col)") // node renaming in class doesn't seem to propogate
+ .as(s"rf_agg_approx_histogram($col)") // node renaming in class doesn't seem to propogate
.as[CellHistogram]
/** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */
@@ -117,7 +116,7 @@ object HistogramAggregate {
class HistogramAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId)
extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new HistogramAggregate()), Complete, false, NamedExpression.newExprId)
- override def nodeName: String = "agg_approx_histogram"
+ override def nodeName: String = "rf_agg_approx_histogram"
}
object HistogramAggregateUDAF {
def apply(child: Expression): HistogramAggregateUDAF = new HistogramAggregateUDAF(child)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalCountAggregate.scala
similarity index 88%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalCountAggregate.scala
index f427d9ee3..256cd63dd 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalCountAggregate.scala
@@ -19,19 +19,19 @@
*
*/
-package astraea.spark.rasterframes.expressions.aggstats
+package org.locationtech.rasterframes.expressions.aggregates
-import astraea.spark.rasterframes.expressions.accessors.ExtractTile
-import astraea.spark.rasterframes.functions.safeBinaryOp
+import org.locationtech.rasterframes.expressions.accessors.ExtractTile
+import org.locationtech.rasterframes.functions.safeBinaryOp
import geotrellis.raster.mapalgebra.local.{Add, Defined, Undefined}
import geotrellis.raster.{IntConstantNoDataCellType, Tile}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
import org.apache.spark.sql.execution.aggregate.ScalaUDAF
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types.{DataType, StructField, StructType}
import org.apache.spark.sql.{Column, Row, TypedColumn}
+import org.locationtech.rasterframes.TileType
/**
* Catalyst aggregate function that counts `NoData` values in a cell-wise fashion.
@@ -47,8 +47,6 @@ class LocalCountAggregate(isData: Boolean) extends UserDefinedAggregateFunction
private val add = safeBinaryOp(Add.apply(_: Tile, _: Tile))
- private val TileType = new TileUDT()
-
override def dataType: DataType = TileType
override def inputSchema: StructType = StructType(Seq(
@@ -83,19 +81,19 @@ class LocalCountAggregate(isData: Boolean) extends UserDefinedAggregateFunction
override def evaluate(buffer: Row): Tile = buffer.getAs[Tile](0)
}
object LocalCountAggregate {
- import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+ import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
@ExpressionDescription(
usage = "_FUNC_(tile) - Compute cell-wise count of non-no-data values."
)
class LocalDataCellsUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalCountAggregate(true)), Complete, false, NamedExpression.newExprId)
- override def nodeName: String = "agg_local_data_cells"
+ override def nodeName: String = "rf_agg_local_data_cells"
}
object LocalDataCellsUDAF {
def apply(child: Expression): LocalDataCellsUDAF = new LocalDataCellsUDAF(child)
def apply(tile: Column): TypedColumn[Any, Tile] =
new Column(new LocalDataCellsUDAF(tile.expr))
- .as(s"agg_local_data_cells($tile)")
+ .as(s"rf_agg_local_data_cells($tile)")
.as[Tile]
}
@@ -104,13 +102,13 @@ object LocalCountAggregate {
)
class LocalNoDataCellsUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalCountAggregate(false)), Complete, false, NamedExpression.newExprId)
- override def nodeName: String = "agg_local_no_data_cells"
+ override def nodeName: String = "rf_agg_local_no_data_cells"
}
object LocalNoDataCellsUDAF {
def apply(child: Expression): LocalNoDataCellsUDAF = new LocalNoDataCellsUDAF(child)
def apply(tile: Column): TypedColumn[Any, Tile] =
new Column(new LocalNoDataCellsUDAF(tile.expr))
- .as(s"agg_local_no_data_cells($tile)")
+ .as(s"rf_agg_local_no_data_cells($tile)")
.as[Tile]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalMeanAggregate.scala
similarity index 77%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalMeanAggregate.scala
index bab1eba20..06741a98c 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalMeanAggregate.scala
@@ -19,27 +19,26 @@
*
*/
-package astraea.spark.rasterframes.expressions.aggstats
+package org.locationtech.rasterframes.expressions.aggregates
-import astraea.spark.rasterframes.expressions.UnaryRasterAggregate
-import astraea.spark.rasterframes.expressions.localops.{Add => AddTiles, Divide => DivideTiles}
-import astraea.spark.rasterframes.expressions.transformers.SetCellType
+import org.locationtech.rasterframes.expressions.UnaryRasterAggregate
+import org.locationtech.rasterframes.expressions.localops.{BiasedAdd, Divide => DivideTiles}
+import org.locationtech.rasterframes.expressions.transformers.SetCellType
import geotrellis.raster.Tile
import geotrellis.raster.mapalgebra.local
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ExpressionDescription, If, IsNull, Literal}
-import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.TileType
@ExpressionDescription(
usage = "_FUNC_(tile) - Computes a new tile contining the mean cell values across all tiles in column.",
note = "All tiles in the column must be the same size."
)
case class LocalMeanAggregate(child: Expression) extends UnaryRasterAggregate {
- private val TileType = new TileUDT()
override def dataType: DataType = TileType
- override def nodeName: String = "agg_local_mean"
+ override def nodeName: String = "rf_agg_local_mean"
private lazy val count =
AttributeReference("count", TileType, true)()
@@ -60,21 +59,21 @@ case class LocalMeanAggregate(child: Expression) extends UnaryRasterAggregate {
override lazy val updateExpressions: Seq[Expression] = Seq(
If(IsNull(count),
SetCellType(Defined(child), Literal("int32")),
- If(IsNull(child), count, AddTiles(count, Defined(child)))
+ If(IsNull(child), count, BiasedAdd(count, Defined(child)))
),
If(IsNull(sum),
SetCellType(child, Literal("float64")),
- If(IsNull(child), sum, AddTiles(sum, child))
+ If(IsNull(child), sum, BiasedAdd(sum, child))
)
)
override val mergeExpressions: Seq[Expression] = Seq(
- AddTiles(count.left, count.right),
- AddTiles(sum.left, sum.right)
+ BiasedAdd(count.left, count.right),
+ BiasedAdd(sum.left, sum.right)
)
override lazy val evaluateExpression: Expression = DivideTiles(sum, count)
}
object LocalMeanAggregate {
- import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+ import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
def apply(tile: Column): TypedColumn[Any, Tile] =
new Column(new LocalMeanAggregate(tile.expr).toAggregateExpression()).as[Tile]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalStatsAggregate.scala
similarity index 92%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalStatsAggregate.scala
index 8df684a25..86b360dea 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalStatsAggregate.scala
@@ -19,21 +19,21 @@
*
*/
-package astraea.spark.rasterframes.expressions.aggstats
+package org.locationtech.rasterframes.expressions.aggregates
-import astraea.spark.rasterframes.expressions.accessors.ExtractTile
-import astraea.spark.rasterframes.functions.safeBinaryOp
-import astraea.spark.rasterframes.stats.LocalCellStatistics
-import astraea.spark.rasterframes.util.DataBiasedOp.{BiasedAdd, BiasedMax, BiasedMin}
+import org.locationtech.rasterframes.expressions.accessors.ExtractTile
+import org.locationtech.rasterframes.functions.safeBinaryOp
+import org.locationtech.rasterframes.stats.LocalCellStatistics
+import org.locationtech.rasterframes.util.DataBiasedOp.{BiasedAdd, BiasedMax, BiasedMin}
import geotrellis.raster.mapalgebra.local._
import geotrellis.raster.{DoubleConstantNoDataCellType, IntConstantNoDataCellType, IntUserDefinedNoDataCellType, Tile}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
import org.apache.spark.sql.execution.aggregate.ScalaUDAF
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, Row, TypedColumn}
+import org.locationtech.rasterframes.TileType
/**
@@ -44,8 +44,6 @@ import org.apache.spark.sql.{Column, Row, TypedColumn}
class LocalStatsAggregate() extends UserDefinedAggregateFunction {
import LocalStatsAggregate.C
- private val TileType = new TileUDT()
-
override def inputSchema: StructType = StructType(Seq(
StructField("value", TileType, true)
))
@@ -149,7 +147,7 @@ object LocalStatsAggregate {
def apply(col: Column): TypedColumn[Any, LocalCellStatistics] =
new Column(LocalStatsAggregateUDAF(col.expr))
- .as(s"agg_local_stats($col)")
+ .as(s"rf_agg_local_stats($col)")
.as[LocalCellStatistics]
/** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */
@@ -166,7 +164,7 @@ object LocalStatsAggregate {
class LocalStatsAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId)
extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalStatsAggregate()), Complete, false, NamedExpression.newExprId)
- override def nodeName: String = "agg_local_stats"
+ override def nodeName: String = "rf_agg_local_stats"
}
object LocalStatsAggregateUDAF {
def apply(child: Expression): LocalStatsAggregateUDAF = new LocalStatsAggregateUDAF(child)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalTileOpAggregate.scala
similarity index 86%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalTileOpAggregate.scala
index 7a5032176..b739961c1 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalTileOpAggregate.scala
@@ -19,21 +19,20 @@
*
*/
-package astraea.spark.rasterframes.expressions.aggstats
+package org.locationtech.rasterframes.expressions.aggregates
-import astraea.spark.rasterframes.expressions.accessors.ExtractTile
-import astraea.spark.rasterframes.functions.safeBinaryOp
-import astraea.spark.rasterframes.util.DataBiasedOp.{BiasedMax, BiasedMin}
+import org.locationtech.rasterframes.TileType
+import org.locationtech.rasterframes.expressions.accessors.ExtractTile
+import org.locationtech.rasterframes.functions.safeBinaryOp
+import org.locationtech.rasterframes.util.DataBiasedOp.{BiasedMax, BiasedMin}
import geotrellis.raster.Tile
-import geotrellis.raster.mapalgebra.local
import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
-import org.apache.spark.sql.{Column, Row, TypedColumn}
-import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete}
+import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression}
import org.apache.spark.sql.execution.aggregate.ScalaUDAF
import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
-import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, Row, TypedColumn}
/**
* Aggregation function for applying a [[LocalTileBinaryOp]] pairwise across all tiles. Assumes Monoid algebra.
@@ -44,8 +43,6 @@ class LocalTileOpAggregate(op: LocalTileBinaryOp) extends UserDefinedAggregateFu
private val safeOp = safeBinaryOp(op.apply(_: Tile, _: Tile))
- private val TileType = new TileUDT()
-
override def inputSchema: StructType = StructType(Seq(
StructField("value", TileType, true)
))
@@ -75,14 +72,14 @@ class LocalTileOpAggregate(op: LocalTileBinaryOp) extends UserDefinedAggregateFu
}
object LocalTileOpAggregate {
- import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+ import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
@ExpressionDescription(
usage = "_FUNC_(tile) - Compute cell-wise minimum value from a tile column."
)
class LocalMinUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalTileOpAggregate(BiasedMin)), Complete, false, NamedExpression.newExprId)
- override def nodeName: String = "agg_local_min"
+ override def nodeName: String = "rf_agg_local_min"
}
object LocalMinUDAF {
def apply(child: Expression): LocalMinUDAF = new LocalMinUDAF(child)
@@ -94,7 +91,7 @@ object LocalTileOpAggregate {
)
class LocalMaxUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) {
def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalTileOpAggregate(BiasedMax)), Complete, false, NamedExpression.newExprId)
- override def nodeName: String = "agg_local_max"
+ override def nodeName: String = "rf_agg_local_max"
}
object LocalMaxUDAF {
def apply(child: Expression): LocalMaxUDAF = new LocalMaxUDAF(child)
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/ProjectedLayerMetadataAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/ProjectedLayerMetadataAggregate.scala
new file mode 100644
index 000000000..0f1b4727a
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/ProjectedLayerMetadataAggregate.scala
@@ -0,0 +1,179 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.aggregates
+
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.encoders.CatalystSerializer
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.model.TileDimensions
+import geotrellis.proj4.{CRS, Transform}
+import geotrellis.raster._
+import geotrellis.raster.reproject.{Reproject, ReprojectRasterExtent}
+import geotrellis.spark.tiling.LayoutDefinition
+import geotrellis.spark.{KeyBounds, SpatialKey, TileLayerMetadata}
+import geotrellis.vector.Extent
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.{Column, Row, TypedColumn}
+
+class ProjectedLayerMetadataAggregate(destCRS: CRS, destDims: TileDimensions) extends UserDefinedAggregateFunction {
+ import ProjectedLayerMetadataAggregate._
+
+ override def inputSchema: StructType = CatalystSerializer[InputRecord].schema
+
+ override def bufferSchema: StructType = CatalystSerializer[BufferRecord].schema
+
+ override def dataType: DataType = CatalystSerializer[TileLayerMetadata[SpatialKey]].schema
+
+ override def deterministic: Boolean = true
+
+ override def initialize(buffer: MutableAggregationBuffer): Unit = ()
+
+ override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+ if(!input.isNullAt(0)) {
+ val in = input.to[InputRecord]
+
+ if(buffer.isNullAt(0)) {
+ in.toBufferRecord(destCRS).write(buffer)
+ }
+ else {
+ val br = buffer.to[BufferRecord]
+ br.merge(in.toBufferRecord(destCRS)).write(buffer)
+ }
+ }
+ }
+
+ override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+ (buffer1.isNullAt(0), buffer2.isNullAt(0)) match {
+ case (false, false) ⇒
+ val left = buffer1.to[BufferRecord]
+ val right = buffer2.to[BufferRecord]
+ left.merge(right).write(buffer1)
+ case (true, false) ⇒ buffer2.to[BufferRecord].write(buffer1)
+ case _ ⇒ ()
+ }
+ }
+
+ override def evaluate(buffer: Row): Any = {
+ import org.locationtech.rasterframes.encoders.CatalystSerializer._
+ val buf = buffer.to[BufferRecord]
+
+ val re = RasterExtent(buf.extent, buf.cellSize)
+ val layout = LayoutDefinition(re, destDims.cols, destDims.rows)
+
+ val kb = KeyBounds(layout.mapTransform(buf.extent))
+ TileLayerMetadata(buf.cellType, layout, buf.extent, destCRS, kb).toRow
+ }
+}
+
+object ProjectedLayerMetadataAggregate {
+ import org.locationtech.rasterframes.encoders.StandardEncoders._
+
+ /** Primary user facing constructor */
+ def apply(destCRS: CRS, extent: Column, crs: Column, cellType: Column, tileSize: Column): TypedColumn[Any, TileLayerMetadata[SpatialKey]] =
+ // Ordering must match InputRecord schema
+ new ProjectedLayerMetadataAggregate(destCRS, TileDimensions(NOMINAL_TILE_SIZE, NOMINAL_TILE_SIZE))(extent, crs, cellType, tileSize).as[TileLayerMetadata[SpatialKey]]
+
+ def apply(destCRS: CRS, destDims: TileDimensions, extent: Column, crs: Column, cellType: Column, tileSize: Column): TypedColumn[Any, TileLayerMetadata[SpatialKey]] =
+ // Ordering must match InputRecord schema
+ new ProjectedLayerMetadataAggregate(destCRS, destDims)(extent, crs, cellType, tileSize).as[TileLayerMetadata[SpatialKey]]
+
+ private[expressions]
+ case class InputRecord(extent: Extent, crs: CRS, cellType: CellType, tileSize: TileDimensions) {
+ def toBufferRecord(destCRS: CRS): BufferRecord = {
+ val transform = Transform(crs, destCRS)
+
+ val re = ReprojectRasterExtent(
+ RasterExtent(extent, tileSize.cols, tileSize.rows),
+ transform, Reproject.Options.DEFAULT
+ )
+
+ BufferRecord(
+ re.extent,
+ cellType,
+ re.cellSize
+ )
+ }
+ }
+
+ private[expressions]
+ object InputRecord {
+ implicit val serializer: CatalystSerializer[InputRecord] = new CatalystSerializer[InputRecord]{
+ override def schema: StructType = StructType(Seq(
+ StructField("extent", CatalystSerializer[Extent].schema, false),
+ StructField("crs", CatalystSerializer[CRS].schema, false),
+ StructField("cellType", CatalystSerializer[CellType].schema, false),
+ StructField("tileSize", CatalystSerializer[TileDimensions].schema, false)
+ ))
+
+ override protected def to[R](t: InputRecord, io: CatalystIO[R]): R =
+ throw new IllegalStateException("InputRecord is input only.")
+
+ override protected def from[R](t: R, io: CatalystIO[R]): InputRecord = InputRecord(
+ io.get[Extent](t, 0),
+ io.get[CRS](t, 1),
+ io.get[CellType](t, 2),
+ io.get[TileDimensions](t, 3)
+ )
+ }
+ }
+
+ private[expressions]
+ case class BufferRecord(extent: Extent, cellType: CellType, cellSize: CellSize) {
+ def merge(that: BufferRecord): BufferRecord = {
+ val ext = this.extent.combine(that.extent)
+ val ct = this.cellType.union(that.cellType)
+ val cs = if (this.cellSize.resolution < that.cellSize.resolution) this.cellSize else that.cellSize
+ BufferRecord(ext, ct, cs)
+ }
+
+ def write(buffer: MutableAggregationBuffer): Unit = {
+ val encoded = (this).toRow
+ for(i <- 0 until encoded.size) {
+ buffer(i) = encoded(i)
+ }
+ }
+ }
+
+ private[expressions]
+ object BufferRecord {
+ implicit val serializer: CatalystSerializer[BufferRecord] = new CatalystSerializer[BufferRecord] {
+ override def schema: StructType = StructType(Seq(
+ StructField("extent", CatalystSerializer[Extent].schema, true),
+ StructField("cellType", CatalystSerializer[CellType].schema, true),
+ StructField("cellSize", CatalystSerializer[CellSize].schema, true)
+ ))
+
+ override protected def to[R](t: BufferRecord, io: CatalystIO[R]): R = io.create(
+ io.to(t.extent),
+ io.to(t.cellType),
+ io.to(t.cellSize)
+ )
+
+ override protected def from[R](t: R, io: CatalystIO[R]): BufferRecord = BufferRecord(
+ io.get[Extent](t, 0),
+ io.get[CellType](t, 1),
+ io.get[CellSize](t, 2)
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/TileRasterizerAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/TileRasterizerAggregate.scala
new file mode 100644
index 000000000..e1b11ae3b
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/TileRasterizerAggregate.scala
@@ -0,0 +1,109 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.aggregates
+
+import geotrellis.proj4.CRS
+import geotrellis.raster.reproject.Reproject
+import geotrellis.raster.resample.ResampleMethod
+import geotrellis.raster.{ArrayTile, CellType, Raster, Tile}
+import geotrellis.spark.TileLayerMetadata
+import geotrellis.vector.Extent
+import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.{Column, Row, TypedColumn}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate.ProjectedRasterDefinition
+
+/**
+ * Aggregation function for creating a single `geotrellis.raster.Raster[Tile]` from
+ * `Tile`, `CRS` and `Extent` columns.
+ * @param prd aggregation settings
+ */
+class TileRasterizerAggregate(prd: ProjectedRasterDefinition) extends UserDefinedAggregateFunction {
+
+ val projOpts = Reproject.Options.DEFAULT.copy(method = prd.sampler)
+
+ override def deterministic: Boolean = true
+
+ override def inputSchema: StructType = StructType(Seq(
+ StructField("crs", schemaOf[CRS], false),
+ StructField("extent", schemaOf[Extent], false),
+ StructField("tile", TileType)
+ ))
+
+ override def bufferSchema: StructType = StructType(Seq(
+ StructField("tile_buffer", TileType)
+ ))
+
+ override def dataType: DataType = schemaOf[Raster[Tile]]
+
+ override def initialize(buffer: MutableAggregationBuffer): Unit = {
+ buffer(0) = ArrayTile.empty(prd.cellType, prd.totalCols, prd.totalRows)
+ }
+
+ override def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+ val crs = input.getAs[Row](0).to[CRS]
+ val extent = input.getAs[Row](1).to[Extent]
+
+ val localExtent = extent.reproject(crs, prd.crs)
+
+ if (prd.extent.intersects(localExtent)) {
+ val localTile = input.getAs[Tile](2).reproject(extent, crs, prd.crs, projOpts)
+ val bt = buffer.getAs[Tile](0)
+ val merged = bt.merge(prd.extent, localExtent, localTile.tile, prd.sampler)
+ buffer(0) = merged
+ }
+ }
+
+ override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+ val leftTile = buffer1.getAs[Tile](0)
+ val rightTile = buffer2.getAs[Tile](0)
+ buffer1(0) = leftTile.merge(rightTile)
+ }
+
+ override def evaluate(buffer: Row): Raster[Tile] = {
+ val t = buffer.getAs[Tile](0)
+ Raster(t, prd.extent)
+ }
+}
+
+object TileRasterizerAggregate {
+ val nodeName = "rf_tile_rasterizer_aggregate"
+ /** Convenience grouping of parameters needed for running aggregate. */
+ case class ProjectedRasterDefinition(totalCols: Int, totalRows: Int, cellType: CellType, crs: CRS, extent: Extent, sampler: ResampleMethod = ResampleMethod.DEFAULT)
+
+ object ProjectedRasterDefinition {
+ def apply(tlm: TileLayerMetadata[_]): ProjectedRasterDefinition = apply(tlm, ResampleMethod.DEFAULT)
+
+ def apply(tlm: TileLayerMetadata[_], sampler: ResampleMethod): ProjectedRasterDefinition = {
+ // Try to determine the actual dimensions of our data coverage
+ val actualSize = tlm.layout.toRasterExtent().gridBoundsFor(tlm.extent) // <--- Do we have the math right here?
+ val cols = actualSize.width
+ val rows = actualSize.height
+ new ProjectedRasterDefinition(cols, rows, tlm.cellType, tlm.crs, tlm.extent, sampler)
+ }
+}
+
+ def apply(prd: ProjectedRasterDefinition, crsCol: Column, extentCol: Column, tileCol: Column): TypedColumn[Any, Raster[Tile]] =
+ new TileRasterizerAggregate(prd)(crsCol, extentCol, tileCol).as(nodeName).as[Raster[Tile]]
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/ExplodeTiles.scala
similarity index 93%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/generators/ExplodeTiles.scala
index e39ca1814..bd2a4689a 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/ExplodeTiles.scala
@@ -19,11 +19,11 @@
*
*/
-package astraea.spark.rasterframes.expressions.generators
+package org.locationtech.rasterframes.expressions.generators
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.util._
import geotrellis.raster._
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.InternalRow
@@ -43,7 +43,7 @@ case class ExplodeTiles(
extends Expression with Generator with CodegenFallback {
def this(children: Seq[Expression]) = this(1.0, None, children)
- override def nodeName: String = "explode_tiles"
+ override def nodeName: String = "rf_explode_tiles"
override def elementSchema: StructType = {
val names =
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala
new file mode 100644
index 000000000..68c7209e5
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala
@@ -0,0 +1,93 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.generators
+
+import com.typesafe.scalalogging.LazyLogging
+import geotrellis.vector.Extent
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.bandNames
+import org.locationtech.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.ref.{RasterRef, RasterSource}
+import org.locationtech.rasterframes.util._
+import org.locationtech.rasterframes.RasterSourceType
+
+import scala.util.Try
+import scala.util.control.NonFatal
+
+/**
+ * Accepts RasterSource and generates one or more RasterRef instances representing
+ *
+ * @since 9/6/18
+ */
+case class RasterSourceToRasterRefs(children: Seq[Expression], bandIndexes: Seq[Int], subtileDims: Option[TileDimensions] = None) extends Expression
+ with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging {
+
+ override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType)
+ override def nodeName: String = "rf_raster_source_to_raster_ref"
+
+ override def elementSchema: StructType = StructType(for {
+ child <- children
+ basename = child.name + "_ref"
+ name <- bandNames(basename, bandIndexes)
+ } yield StructField(name, schemaOf[RasterRef], true))
+
+ private def band2ref(src: RasterSource, e: Option[Extent])(b: Int): RasterRef =
+ if (b < src.bandCount) RasterRef(src, b, e) else null
+
+ override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
+ try {
+ val refs = children.map { child ⇒
+ val src = RasterSourceType.deserialize(child.eval(input))
+ subtileDims.map(dims =>
+ src
+ .layoutExtents(dims)
+ .map(e ⇒ bandIndexes.map(band2ref(src, Some(e))))
+ )
+ .getOrElse(Seq(bandIndexes.map(band2ref(src, None))))
+ }
+ refs.transpose.map(ts ⇒ InternalRow(ts.flatMap(_.map(_.toInternalRow)): _*))
+ }
+ catch {
+ case NonFatal(ex) ⇒
+ val payload = Try(children.map(c => RasterSourceType.deserialize(c.eval(input)))).toOption.toSeq.flatten
+ logger.error("Error fetching data for one of: " + payload.mkString(", "), ex)
+ Traversable.empty
+ }
+ }
+}
+
+object RasterSourceToRasterRefs {
+ def apply(rrs: Column*): TypedColumn[Any, RasterRef] = apply(None, Seq(0), rrs: _*)
+ def apply(subtileDims: Option[TileDimensions], bandIndexes: Seq[Int], rrs: Column*): TypedColumn[Any, RasterRef] =
+ new Column(new RasterSourceToRasterRefs(rrs.map(_.expr), bandIndexes, subtileDims)).as[RasterRef]
+
+ private[rasterframes] def bandNames(basename: String, bandIndexes: Seq[Int]): Seq[String] = bandIndexes match {
+ case Seq() => Seq.empty
+ case Seq(0) => Seq(basename)
+ case s => s.map(n => basename + "_b" + n)
+ }
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala
new file mode 100644
index 000000000..32b3f4b11
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala
@@ -0,0 +1,88 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.generators
+
+import com.typesafe.scalalogging.LazyLogging
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.bandNames
+import org.locationtech.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
+import org.locationtech.rasterframes.util._
+import org.locationtech.rasterframes.RasterSourceType
+
+import scala.util.Try
+import scala.util.control.NonFatal
+
+/**
+ * Accepts RasterRef and generates one or more RasterRef instances representing the
+ * native internal sub-tiling, if any (and requested).
+ *
+ * @since 9/6/18
+ */
+case class RasterSourceToTiles(children: Seq[Expression], bandIndexes: Seq[Int], subtileDims: Option[TileDimensions] = None) extends Expression
+ with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging {
+
+ override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType)
+ override def nodeName: String = "rf_raster_source_to_tiles"
+
+ override def elementSchema: StructType = StructType(for {
+ child <- children
+ basename = child.name
+ name <- bandNames(basename, bandIndexes)
+ } yield StructField(name, schemaOf[ProjectedRasterTile], true))
+
+ override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
+ try {
+ val tiles = children.map { child ⇒
+ val src = RasterSourceType.deserialize(child.eval(input))
+ val maxBands = src.bandCount
+ val allowedBands = bandIndexes.filter(_ < maxBands)
+ src.readAll(subtileDims.getOrElse(rasterframes.NOMINAL_TILE_DIMS), allowedBands)
+ .map(r => bandIndexes.map {
+ case i if i < maxBands => ProjectedRasterTile(r.tile.band(i), r.extent, src.crs)
+ case _ => null
+ })
+ }
+ tiles.transpose.map(ts ⇒ InternalRow(ts.flatMap(_.map(_.toInternalRow)): _*))
+ }
+ catch {
+ case NonFatal(ex) ⇒
+ val payload = Try(children.map(c => RasterSourceType.deserialize(c.eval(input)))).toOption.toSeq.flatten
+ logger.error("Error fetching data for one of: " + payload.mkString(", "), ex)
+ Traversable.empty
+ }
+ }
+}
+
+object RasterSourceToTiles {
+ def apply(rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = apply(None, Seq(0), rrs: _*)
+ def apply(subtileDims: Option[TileDimensions], bandIndexes: Seq[Int], rrs: Column*): TypedColumn[Any, ProjectedRasterTile] =
+ new Column(new RasterSourceToTiles(rrs.map(_.expr), bandIndexes, subtileDims)).as[ProjectedRasterTile]
+}
+
+
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala
new file mode 100644
index 000000000..0fe6cac87
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala
@@ -0,0 +1,50 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.localops
+
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Compute the absolute value of each cell.",
+ arguments = """
+ Arguments:
+ * tile - tile column to apply abs""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Abs(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback {
+ override def nodeName: String = "rf_abs"
+ override def na: Any = null
+ override protected def op(t: Tile): Tile = t.localAbs()
+}
+
+object Abs {
+ def apply(tile: Column): TypedColumn[Any, Tile] =
+ new Column(Abs(tile.expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala
similarity index 76%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala
index d7f1a7867..b7a3c8946 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala
@@ -19,19 +19,17 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor
-import astraea.spark.rasterframes.expressions.{BinaryLocalRasterOp, DynamicExtractors}
-import astraea.spark.rasterframes.util.DataBiasedOp.BiasedAdd
import geotrellis.raster.Tile
-import org.apache.spark.sql.rf._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor
@ExpressionDescription(
usage = "_FUNC_(tile, rhs) - Performs cell-wise addition between two tiles or a tile and a scalar.",
@@ -48,10 +46,10 @@ import org.apache.spark.sql.{Column, TypedColumn}
)
case class Add(left: Expression, right: Expression) extends BinaryLocalRasterOp
with CodegenFallback {
- override val nodeName: String = "local_add"
- override protected def op(left: Tile, right: Tile): Tile = BiasedAdd(left, right)
- override protected def op(left: Tile, right: Double): Tile = BiasedAdd(left, right)
- override protected def op(left: Tile, right: Int): Tile = BiasedAdd(left, right)
+ override val nodeName: String = "rf_local_add"
+ override protected def op(left: Tile, right: Tile): Tile = left.localAdd(right)
+ override protected def op(left: Tile, right: Double): Tile = left.localAdd(right)
+ override protected def op(left: Tile, right: Int): Tile = left.localAdd(right)
override def eval(input: InternalRow): Any = {
if(input == null) null
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala
new file mode 100644
index 000000000..10bd82e62
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala
@@ -0,0 +1,74 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.localops
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor
+import org.locationtech.rasterframes.util.DataBiasedOp
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile, rhs) - Performs cell-wise addition between two tiles or a tile and a scalar. " +
+ "Unlike a regular 'add', this considers ` + = .",
+ arguments = """
+ Arguments:
+ * tile - left-hand-side tile
+ * rhs - a tile or scalar value to add to each cell""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile, 1.5);
+ ...
+ > SELECT _FUNC_(tile1, tile2);
+ ..."""
+)
+case class BiasedAdd(left: Expression, right: Expression) extends BinaryLocalRasterOp
+ with CodegenFallback {
+ override val nodeName: String = "rf_local_biased_add"
+ override protected def op(left: Tile, right: Tile): Tile = DataBiasedOp.BiasedAdd(left, right)
+ override protected def op(left: Tile, right: Double): Tile = DataBiasedOp.BiasedAdd(left, right)
+ override protected def op(left: Tile, right: Int): Tile = DataBiasedOp.BiasedAdd(left, right)
+
+ override def eval(input: InternalRow): Any = {
+ if(input == null) null
+ else {
+ val l = left.eval(input)
+ val r = right.eval(input)
+ if (l == null && r == null) null
+ else if (l == null) r
+ else if (r == null && tileExtractor.isDefinedAt(right.dataType)) l
+ else if (r == null) null
+ else nullSafeEval(l, r)
+ }
+ }
+}
+object BiasedAdd {
+ def apply(left: Column, right: Column): TypedColumn[Any, Tile] =
+ new Column(BiasedAdd(left.expr, right.expr)).as[Tile]
+
+ def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] =
+ new Column(BiasedAdd(tile.expr, lit(value).expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala
similarity index 90%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala
index 37aa4ab6c..2b5f7d112 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
@@ -43,7 +43,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class Divide(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_divide"
+ override val nodeName: String = "rf_local_divide"
override protected def op(left: Tile, right: Tile): Tile = left.localDivide(right)
override protected def op(left: Tile, right: Double): Tile = left.localDivide(right)
override protected def op(left: Tile, right: Int): Tile = left.localDivide(right)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala
index 610b8beff..a9e809b47 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class Equal(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_equal"
+ override val nodeName: String = "rf_local_equal"
override protected def op(left: Tile, right: Tile): Tile = left.localEqual(right)
override protected def op(left: Tile, right: Double): Tile = left.localEqual(right)
override protected def op(left: Tile, right: Int): Tile = left.localEqual(right)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala
similarity index 90%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala
index 40d34ee06..ca9905e29 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.{UnaryLocalRasterOp, fpTile}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile}
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class Exp(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "exp"
+ override val nodeName: String = "rf_exp"
override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(math.E)
@@ -63,7 +63,7 @@ object Exp {
..."""
)
case class Exp10(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "log10"
+ override val nodeName: String = "rf_log10"
override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(10.0)
@@ -84,7 +84,7 @@ object Exp10 {
..."""
)
case class Exp2(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "exp2"
+ override val nodeName: String = "rf_exp2"
override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(2.0)
@@ -105,7 +105,7 @@ object Exp2{
..."""
)
case class ExpM1(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "expm1"
+ override val nodeName: String = "rf_expm1"
override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(math.E).localSubtract(1.0)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala
index f78022972..ad9b8bf03 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala
@@ -18,10 +18,10 @@
* SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -40,7 +40,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class Greater(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_greater"
+ override val nodeName: String = "rf_local_greater"
override protected def op(left: Tile, right: Tile): Tile = left.localGreater(right)
override protected def op(left: Tile, right: Double): Tile = left.localGreater(right)
override protected def op(left: Tile, right: Int): Tile = left.localGreater(right)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala
index bf43ceca5..725898ca5 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class GreaterEqual(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_greater_equal"
+ override val nodeName: String = "rf_local_greater_equal"
override protected def op(left: Tile, right: Tile): Tile = left.localGreaterOrEqual(right)
override protected def op(left: Tile, right: Double): Tile = left.localGreaterOrEqual(right)
override protected def op(left: Tile, right: Int): Tile = left.localGreaterOrEqual(right)
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala
new file mode 100644
index 000000000..60e607f8b
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala
@@ -0,0 +1,50 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions.localops
+
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp}
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Return the given tile or projected raster unchanged. Useful in debugging round-trip serialization across various language and memory boundaries.",
+ arguments = """
+ Arguments:
+ * tile - tile column to pass through""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(tile);
+ ..."""
+)
+case class Identity(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback {
+ override def nodeName: String = "rf_identity"
+ override def na: Any = null
+ override protected def op(t: Tile): Tile = t
+}
+
+object Identity {
+ def apply(tile: Column): TypedColumn[Any, Tile] =
+ new Column(Identity(tile.expr)).as[Tile]
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala
index 4f8d4ad7b..a80d628f7 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala
@@ -18,10 +18,10 @@
* SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -40,7 +40,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class Less(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_less"
+ override val nodeName: String = "rf_local_less"
override protected def op(left: Tile, right: Tile): Tile = left.localLess(right)
override protected def op(left: Tile, right: Double): Tile = left.localLess(right)
override protected def op(left: Tile, right: Int): Tile = left.localLess(right)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala
index 983ac7c0d..b9361610b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class LessEqual(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_less_equal"
+ override val nodeName: String = "rf_local_less_equal"
override protected def op(left: Tile, right: Tile): Tile = left.localLessOrEqual(right)
override protected def op(left: Tile, right: Double): Tile = left.localLessOrEqual(right)
override protected def op(left: Tile, right: Int): Tile = left.localLessOrEqual(right)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala
index e2da78ce1..8e8d8a011 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.{UnaryLocalRasterOp, fpTile}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile}
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -63,7 +63,7 @@ object Log {
..."""
)
case class Log10(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "log10"
+ override val nodeName: String = "rf_log10"
override protected def op(tile: Tile): Tile = fpTile(tile).localLog10()
@@ -84,7 +84,7 @@ object Log10 {
..."""
)
case class Log2(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "log2"
+ override val nodeName: String = "rf_log2"
override protected def op(tile: Tile): Tile = fpTile(tile).localLog() / math.log(2.0)
@@ -105,7 +105,7 @@ object Log2{
..."""
)
case class Log1p(child: Expression) extends UnaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "log1p"
+ override val nodeName: String = "rf_log1p"
override protected def op(tile: Tile): Tile = fpTile(tile).localAdd(1.0).localLog()
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala
similarity index 90%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala
index 7ed7c76b8..784771906 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -43,7 +43,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class Multiply(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_multiply"
+ override val nodeName: String = "rf_local_multiply"
override protected def op(left: Tile, right: Tile): Tile = left.localMultiply(right)
override protected def op(left: Tile, right: Double): Tile = left.localMultiply(right)
override protected def op(left: Tile, right: Int): Tile = left.localMultiply(right)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/NormalizedDifference.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/NormalizedDifference.scala
index 5760582d6..e62ccfc37 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/NormalizedDifference.scala
@@ -19,10 +19,11 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
-import astraea.spark.rasterframes.expressions.fpTile
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryRasterOp
+package org.locationtech.rasterframes.expressions.localops
+
+import org.locationtech.rasterframes.expressions.fpTile
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
@@ -37,11 +38,11 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
* right - second tile argument""",
examples = """
Examples:
- > SELECT _FUNC_(nir, red);
+ > SELECT _FUNC_(nir, red) as ndvi;
..."""
)
case class NormalizedDifference(left: Expression, right: Expression) extends BinaryRasterOp with CodegenFallback {
- override val nodeName: String = "normalized_difference"
+ override val nodeName: String = "rf_normalized_difference"
override protected def op(left: Tile, right: Tile): Tile = {
val diff = fpTile(left.localSubtract(right))
val sum = fpTile(left.localAdd(right))
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Resample.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Resample.scala
index fd2ae2f29..6e752dfbc 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Resample.scala
@@ -19,14 +19,13 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor
import geotrellis.raster.Tile
import geotrellis.raster.resample.NearestNeighbor
-import org.apache.spark.sql.rf._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
@@ -48,7 +47,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
)
case class Resample(left: Expression, right: Expression) extends BinaryLocalRasterOp
with CodegenFallback {
- override val nodeName: String = "resample"
+ override val nodeName: String = "rf_resample"
override protected def op(left: Tile, right: Tile): Tile = left.resample(right.cols, right.rows, NearestNeighbor)
override protected def op(left: Tile, right: Double): Tile = left.resample((left.cols * right).toInt,
(left.rows * right).toInt, NearestNeighbor)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala
similarity index 87%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala
index 010666e17..92f1c2f89 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp}
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
@@ -40,7 +40,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
)
case class Round(child: Expression) extends UnaryLocalRasterOp
with NullToValue with CodegenFallback {
- override def nodeName: String = "round"
+ override def nodeName: String = "rf_round"
override def na: Any = null
override protected def op(child: Tile): Tile = child.localRound()
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala
similarity index 90%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala
index 203bb578d..c09a7ea47 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala
@@ -19,9 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+package org.locationtech.rasterframes.expressions.localops
+
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -42,7 +43,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class Subtract(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_subtract"
+ override val nodeName: String = "rf_local_subtract"
override protected def op(left: Tile, right: Tile): Tile = left.localSubtract(right)
override protected def op(left: Tile, right: Double): Tile = left.localSubtract(right)
override protected def op(left: Tile, right: Int): Tile = left.localSubtract(right)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala
index f3342b9c6..48a1e3963 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.localops
+package org.locationtech.rasterframes.expressions.localops
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
..."""
)
case class Unequal(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback {
- override val nodeName: String = "local_unequal"
+ override val nodeName: String = "rf_local_unequal"
override protected def op(left: Tile, right: Tile): Tile = left.localUnequal(right)
override protected def op(left: Tile, right: Double): Tile = left.localUnequal(right)
override protected def op(left: Tile, right: Int): Tile = left.localUnequal(right)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala
similarity index 80%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala
index e4c0bcc00..8a8b70e00 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala
@@ -15,25 +15,28 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
-import astraea.spark.rasterframes.expressions.accessors._
-import astraea.spark.rasterframes.expressions.aggstats._
-import astraea.spark.rasterframes.expressions.generators._
-import astraea.spark.rasterframes.expressions.localops._
-import astraea.spark.rasterframes.expressions.tilestats._
-import astraea.spark.rasterframes.expressions.transformers._
import geotrellis.raster.{DoubleConstantNoDataCellType, Tile}
-import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
+import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection}
import org.apache.spark.sql.rf.VersionShims._
import org.apache.spark.sql.{SQLContext, rf}
+import org.locationtech.rasterframes.expressions.accessors._
+import org.locationtech.rasterframes.expressions.aggregates.CellCountAggregate.DataCells
+import org.locationtech.rasterframes.expressions.aggregates._
+import org.locationtech.rasterframes.expressions.generators._
+import org.locationtech.rasterframes.expressions.localops._
+import org.locationtech.rasterframes.expressions.tilestats._
+import org.locationtech.rasterframes.expressions.transformers._
-import scala.util.Try
import scala.reflect.runtime.universe._
+import scala.util.Try
/**
* Module of Catalyst expressions for efficiently working with tiles.
*
@@ -61,11 +64,17 @@ package object expressions {
registry.registerExpression[Add]("rf_local_add")
registry.registerExpression[Subtract]("rf_local_subtract")
+ registry.registerExpression[TileAssembler]("rf_assemble_tile")
registry.registerExpression[ExplodeTiles]("rf_explode_tiles")
registry.registerExpression[GetCellType]("rf_cell_type")
registry.registerExpression[SetCellType]("rf_convert_cell_type")
- registry.registerExpression[GetDimensions]("rf_tile_dimensions")
- registry.registerExpression[BoundsToGeometry]("rf_bounds_geometry")
+ registry.registerExpression[GetDimensions]("rf_dimensions")
+ registry.registerExpression[ExtentToGeometry]("st_geometry")
+ registry.registerExpression[GetGeometry]("rf_geometry")
+ registry.registerExpression[GeometryToExtent]("st_extent")
+ registry.registerExpression[GetExtent]("rf_extent")
+ registry.registerExpression[GetCRS]("rf_crs")
+ registry.registerExpression[RealizeTile]("rf_tile")
registry.registerExpression[Subtract]("rf_local_subtract")
registry.registerExpression[Multiply]("rf_local_multiply")
registry.registerExpression[Divide]("rf_local_divide")
@@ -78,6 +87,7 @@ package object expressions {
registry.registerExpression[Unequal]("rf_local_unequal")
registry.registerExpression[Sum]("rf_tile_sum")
registry.registerExpression[Round]("rf_round")
+ registry.registerExpression[Abs]("rf_abs")
registry.registerExpression[Log]("rf_log")
registry.registerExpression[Log10]("rf_log10")
registry.registerExpression[Log2]("rf_log2")
@@ -92,12 +102,14 @@ package object expressions {
registry.registerExpression[DataCells]("rf_data_cells")
registry.registerExpression[NoDataCells]("rf_no_data_cells")
registry.registerExpression[IsNoDataTile]("rf_is_no_data_tile")
+ registry.registerExpression[Exists]("rf_exists")
+ registry.registerExpression[ForAll]("rf_for_all")
registry.registerExpression[TileMin]("rf_tile_min")
registry.registerExpression[TileMax]("rf_tile_max")
registry.registerExpression[TileMean]("rf_tile_mean")
registry.registerExpression[TileStats]("rf_tile_stats")
registry.registerExpression[TileHistogram]("rf_tile_histogram")
- registry.registerExpression[CellCountAggregate.DataCells]("rf_agg_data_cells")
+ registry.registerExpression[DataCells]("rf_agg_data_cells")
registry.registerExpression[CellCountAggregate.NoDataCells]("rf_agg_no_data_cells")
registry.registerExpression[CellStatsAggregate.CellStatsAggregateUDAF]("rf_agg_stats")
registry.registerExpression[HistogramAggregate.HistogramAggregateUDAF]("rf_agg_approx_histogram")
@@ -110,9 +122,11 @@ package object expressions {
registry.registerExpression[Mask.MaskByDefined]("rf_mask")
registry.registerExpression[Mask.MaskByValue]("rf_mask_by_value")
+ registry.registerExpression[Mask.InverseMaskByValue]("rf_inverse_mask_by_value")
registry.registerExpression[Mask.InverseMaskByDefined]("rf_inverse_mask")
registry.registerExpression[DebugRender.RenderAscii]("rf_render_ascii")
registry.registerExpression[DebugRender.RenderMatrix]("rf_render_matrix")
+ registry.registerExpression[transformers.ReprojectGeometry]("st_reproject")
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala
index a7d49c4ae..a18148db3 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala
@@ -19,14 +19,15 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.{UnaryRasterOp, NullToValue}
-import astraea.spark.rasterframes.model.TileContext
+package org.locationtech.rasterframes.expressions.tilestats
+
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp}
import geotrellis.raster._
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.types.{DataType, LongType}
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Counts the number of non-no-data cells in a tile",
@@ -40,13 +41,13 @@ import org.apache.spark.sql.types.{DataType, LongType}
)
case class DataCells(child: Expression) extends UnaryRasterOp
with CodegenFallback with NullToValue {
- override def nodeName: String = "data_cells"
+ override def nodeName: String = "rf_data_cells"
override def dataType: DataType = LongType
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = DataCells.op(tile)
override def na: Any = 0L
}
object DataCells {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
def apply(tile: Column): TypedColumn[Any, Long] =
new Column(DataCells(tile.expr)).as[Long]
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala
new file mode 100644
index 000000000..cd04b1467
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala
@@ -0,0 +1,47 @@
+package org.locationtech.rasterframes.expressions.tilestats
+
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.isCellTrue
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.model.TileContext
+import spire.syntax.cfor.cfor
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Returns true if any cells in the tile are true (non-zero and not nodata).",
+ arguments =
+ """
+ Arguments:
+ * tile - tile to check
+ """,
+ examples =
+ """
+ > SELECT _FUNC_(tile);
+ true
+ """
+)
+case class Exists(child: Expression) extends UnaryRasterOp with CodegenFallback {
+ override def nodeName: String = "exists"
+ override def dataType: DataType = BooleanType
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = Exists.op(tile)
+
+}
+
+object Exists{
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc
+
+ def apply(tile: Column): TypedColumn[Any, Boolean] = new Column(Exists(tile.expr)).as[Boolean]
+
+ def op(tile: Tile): Boolean = {
+ cfor(0)(_ < tile.rows, _ + 1) { r ⇒
+ cfor(0)(_ < tile.cols, _ + 1) { c ⇒
+ if(tile.cellType.isFloatingPoint) { if(isCellTrue(tile.getDouble(c, r))) return true }
+ else { if(isCellTrue(tile.get(c, r))) return true }
+ }
+ }
+ false
+ }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala
new file mode 100644
index 000000000..a912a8a0b
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala
@@ -0,0 +1,51 @@
+package org.locationtech.rasterframes.expressions.tilestats
+
+import geotrellis.raster.Tile
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.isCellTrue
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.model.TileContext
+import spire.syntax.cfor.cfor
+
+@ExpressionDescription(
+ usage = "_FUNC_(tile) - Returns true if all cells in the tile are true (non-zero and not nodata).",
+ arguments =
+ """
+ Arguments:
+ * tile - tile to check
+ """,
+ examples =
+ """
+ > SELECT _FUNC_(tile);
+ true
+ """
+)
+case class ForAll(child: Expression) extends UnaryRasterOp with CodegenFallback {
+ override def nodeName: String = "for_all"
+ override def dataType: DataType = BooleanType
+ override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = ForAll.op(tile)
+
+}
+
+object ForAll {
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc
+
+ def apply(tile: Column): TypedColumn[Any, Boolean] = new Column(ForAll(tile.expr)).as[Boolean]
+
+ def op(tile: Tile): Boolean = {
+ cfor(0)(_ < tile.rows, _ + 1) { r ⇒
+ cfor(0)(_ < tile.cols, _ + 1) { c ⇒
+ if (tile.cellType.isFloatingPoint) {
+ if (!isCellTrue(tile.getDouble(c, r))) return false
+ }
+ else {
+ if (!isCellTrue(tile.get(c, r))) return false
+ }
+ }
+ }
+ true
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala
similarity index 83%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala
index 7b360a07c..fd855cd39 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala
@@ -19,14 +19,15 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp}
-import astraea.spark.rasterframes.model.TileContext
+package org.locationtech.rasterframes.expressions.tilestats
+
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp}
import geotrellis.raster._
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.types.{BooleanType, DataType}
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Produces `true` if all the cells in a given tile are no-data",
@@ -40,13 +41,13 @@ import org.apache.spark.sql.types.{BooleanType, DataType}
)
case class IsNoDataTile(child: Expression) extends UnaryRasterOp
with CodegenFallback with NullToValue {
- override def nodeName: String = "is_no_data_tile"
+ override def nodeName: String = "rf_is_no_data_tile"
override def na: Any = true
override def dataType: DataType = BooleanType
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = tile.isNoDataTile
}
object IsNoDataTile {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc
def apply(tile: Column): TypedColumn[Any, Boolean] =
new Column(IsNoDataTile(tile.expr)).as[Boolean]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala
index 89c2ae10b..cf47ba14e 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala
@@ -19,15 +19,15 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
+package org.locationtech.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.{UnaryRasterOp, NullToValue}
-import astraea.spark.rasterframes.model.TileContext
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp}
import geotrellis.raster._
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.types.{DataType, LongType}
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Counts the number of no-data cells in a tile",
@@ -41,13 +41,13 @@ import org.apache.spark.sql.types.{DataType, LongType}
)
case class NoDataCells(child: Expression) extends UnaryRasterOp
with CodegenFallback with NullToValue {
- override def nodeName: String = "no_data_cells"
+ override def nodeName: String = "rf_no_data_cells"
override def dataType: DataType = LongType
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = NoDataCells.op(tile)
override def na: Any = 0L
}
object NoDataCells {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc
def apply(tile: Column): TypedColumn[Any, Long] =
new Column(NoDataCells(tile.expr)).as[Long]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala
similarity index 83%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala
index cfa10666b..096acdab6 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala
@@ -19,17 +19,18 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.UnaryRasterOp
-import astraea.spark.rasterframes.model.TileContext
+package org.locationtech.rasterframes.expressions.tilestats
+
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
import geotrellis.raster._
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.types.{DataType, DoubleType}
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
- usage = "_FUNC_(tile) - Computes the sum of all the cells in a tile..",
+ usage = "_FUNC_(tile) - Computes the sum of all the cells in a tile.",
arguments = """
Arguments:
* tile - tile to sum up""",
@@ -39,13 +40,13 @@ import org.apache.spark.sql.{Column, TypedColumn}
2135.34"""
)
case class Sum(child: Expression) extends UnaryRasterOp with CodegenFallback {
- override def nodeName: String = "tile_sum"
+ override def nodeName: String = "rf_tile_sum"
override def dataType: DataType = DoubleType
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = Sum.op(tile)
}
object Sum {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
def apply(tile: Column): TypedColumn[Any, Double] =
new Column(Sum(tile.expr)).as[Double]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala
index d7fe7d0c1..96e3d3dcc 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala
@@ -19,17 +19,18 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
+package org.locationtech.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.UnaryRasterOp
-import astraea.spark.rasterframes.model.TileContext
-import astraea.spark.rasterframes.stats.CellHistogram
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.stats.CellHistogram
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Computes per-tile histogram.",
@@ -43,7 +44,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
)
case class TileHistogram(child: Expression) extends UnaryRasterOp
with CodegenFallback {
- override def nodeName: String = "tile_histogram"
+ override def nodeName: String = "rf_tile_histogram"
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any =
TileHistogram.converter(TileHistogram.op(tile))
override def dataType: DataType = CellHistogram.schema
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala
index 0e2595b2a..3204f4aaf 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala
@@ -19,15 +19,15 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
+package org.locationtech.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp}
-import astraea.spark.rasterframes.model.TileContext
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp}
import geotrellis.raster.{Tile, isData}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.types.{DataType, DoubleType}
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Determines the maximum cell value.",
@@ -41,13 +41,13 @@ import org.apache.spark.sql.{Column, TypedColumn}
)
case class TileMax(child: Expression) extends UnaryRasterOp
with NullToValue with CodegenFallback {
- override def nodeName: String = "tile_max"
+ override def nodeName: String = "rf_tile_max"
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMax.op(tile)
override def dataType: DataType = DoubleType
override def na: Any = Double.MinValue
}
object TileMax {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
def apply(tile: Column): TypedColumn[Any, Double] =
new Column(TileMax(tile.expr)).as[Double]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala
similarity index 83%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala
index e23e68c08..92c833f98 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala
@@ -19,16 +19,15 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
+package org.locationtech.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp}
-import astraea.spark.rasterframes.functions.safeEval
-import astraea.spark.rasterframes.model.TileContext
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp}
import geotrellis.raster.{Tile, isData}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.types.{DataType, DoubleType}
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Computes the mean cell value of a tile.",
@@ -42,13 +41,13 @@ import org.apache.spark.sql.{Column, TypedColumn}
)
case class TileMean(child: Expression) extends UnaryRasterOp
with NullToValue with CodegenFallback {
- override def nodeName: String = "tile_mean"
+ override def nodeName: String = "rf_tile_mean"
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMean.op(tile)
override def dataType: DataType = DoubleType
override def na: Any = Double.NaN
}
object TileMean {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
def apply(tile: Column): TypedColumn[Any, Double] =
new Column(TileMean(tile.expr)).as[Double]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala
similarity index 85%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala
index 4d2edc9b3..71fa0194a 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala
@@ -19,15 +19,15 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
+package org.locationtech.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp}
-import astraea.spark.rasterframes.model.TileContext
+import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp}
import geotrellis.raster.{Tile, isData}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.types.{DataType, DoubleType}
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Determines the minimum cell value.",
@@ -41,13 +41,13 @@ import org.apache.spark.sql.{Column, TypedColumn}
)
case class TileMin(child: Expression) extends UnaryRasterOp
with NullToValue with CodegenFallback {
- override def nodeName: String = "tile_min"
+ override def nodeName: String = "rf_tile_min"
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMin.op(tile)
override def dataType: DataType = DoubleType
override def na: Any = Double.MaxValue
}
object TileMin {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc
def apply(tile: Column): TypedColumn[Any, Double] =
new Column(TileMin(tile.expr)).as[Double]
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala
index 015f048e8..fac6d330e 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala
@@ -19,17 +19,18 @@
*
*/
-package astraea.spark.rasterframes.expressions.tilestats
+package org.locationtech.rasterframes.expressions.tilestats
-import astraea.spark.rasterframes.expressions.UnaryRasterOp
-import astraea.spark.rasterframes.model.TileContext
-import astraea.spark.rasterframes.stats.CellStatistics
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.stats.CellStatistics
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Computes per-tile descriptive statistics.",
@@ -43,7 +44,7 @@ import org.apache.spark.sql.{Column, TypedColumn}
)
case class TileStats(child: Expression) extends UnaryRasterOp
with CodegenFallback {
- override def nodeName: String = "tile_stats"
+ override def nodeName: String = "rf_tile_stats"
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any =
TileStats.converter(TileStats.op(tile).orNull)
override def dataType: DataType = CellStatistics.schema
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala
index c26cc6b51..babb9c7b7 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala
@@ -19,10 +19,10 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
-import astraea.spark.rasterframes.expressions.UnaryRasterOp
-import astraea.spark.rasterframes.model.TileContext
-import astraea.spark.rasterframes.util.TileAsMatrix
+package org.locationtech.rasterframes.expressions.transformers
+
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.util.TileAsMatrix
import geotrellis.raster.Tile
import geotrellis.raster.render.ascii.AsciiArtEncoder
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescript
import org.apache.spark.sql.types.{DataType, StringType}
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.unsafe.types.UTF8String
+import org.locationtech.rasterframes.model.TileContext
abstract class DebugRender(asciiArt: Boolean) extends UnaryRasterOp
with CodegenFallback with Serializable {
@@ -45,7 +46,7 @@ abstract class DebugRender(asciiArt: Boolean) extends UnaryRasterOp
}
object DebugRender {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.stringEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.stringEnc
@ExpressionDescription(
usage = "_FUNC_(tile) - Coverts the contents of the given tile an ASCII art string rendering",
@@ -54,7 +55,7 @@ object DebugRender {
* tile - tile to render"""
)
case class RenderAscii(child: Expression) extends DebugRender(true) {
- override def nodeName: String = "render_ascii"
+ override def nodeName: String = "rf_render_ascii"
}
object RenderAscii {
def apply(tile: Column): TypedColumn[Any, String] =
@@ -68,7 +69,7 @@ object DebugRender {
* tile - tile to render"""
)
case class RenderMatrix(child: Expression) extends DebugRender(false) {
- override def nodeName: String = "render_matrix"
+ override def nodeName: String = "rf_render_matrix"
}
object RenderMatrix {
def apply(tile: Column): TypedColumn[Any, String] =
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ExtentToGeometry.scala
similarity index 76%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ExtentToGeometry.scala
index 9d6a8c652..9d2d12d2f 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ExtentToGeometry.scala
@@ -19,12 +19,11 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
+package org.locationtech.rasterframes.expressions.transformers
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.row
-import com.vividsolutions.jts.geom.{Envelope, Geometry}
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.row
+import org.locationtech.jts.geom.{Envelope, Geometry}
import geotrellis.vector.Extent
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
@@ -40,13 +39,13 @@ import org.locationtech.geomesa.spark.jts.encoders.SpatialEncoders
*
* @since 8/24/18
*/
-case class BoundsToGeometry(child: Expression) extends UnaryExpression with CodegenFallback {
- override def nodeName: String = "bounds_geometry"
+case class ExtentToGeometry(child: Expression) extends UnaryExpression with CodegenFallback {
+ override def nodeName: String = "st_geometry"
override def dataType: DataType = JTSTypes.GeometryTypeInstance
- private val envSchema = CatalystSerializer[Envelope].schema
- private val extSchema = CatalystSerializer[Extent].schema
+ private val envSchema = schemaOf[Envelope]
+ private val extSchema = schemaOf[Extent]
override def checkInputDataTypes(): TypeCheckResult = {
child.dataType match {
@@ -71,7 +70,7 @@ case class BoundsToGeometry(child: Expression) extends UnaryExpression with Code
}
}
-object BoundsToGeometry extends SpatialEncoders {
+object ExtentToGeometry extends SpatialEncoders {
def apply(bounds: Column): TypedColumn[Any, Geometry] =
- new Column(new BoundsToGeometry(bounds.expr)).as[Geometry]
+ new Column(new ExtentToGeometry(bounds.expr)).as[Geometry]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/GeometryToExtent.scala
similarity index 76%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/GeometryToExtent.scala
index 4e08ad9ea..adb52468b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/GeometryToExtent.scala
@@ -19,10 +19,9 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
+package org.locationtech.rasterframes.expressions.transformers
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
import geotrellis.vector.Extent
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
@@ -37,10 +36,10 @@ import org.apache.spark.sql.{Column, TypedColumn}
*
* @since 8/24/18
*/
-case class GeometryToBounds(child: Expression) extends UnaryExpression with CodegenFallback {
- override def nodeName: String = "geometry_bounds"
+case class GeometryToExtent(child: Expression) extends UnaryExpression with CodegenFallback {
+ override def nodeName: String = "st_extent"
- override def dataType: DataType = CatalystSerializer[Extent].schema
+ override def dataType: DataType = schemaOf[Extent]
override def checkInputDataTypes(): TypeCheckResult = {
child.dataType match {
@@ -54,13 +53,13 @@ case class GeometryToBounds(child: Expression) extends UnaryExpression with Code
override protected def nullSafeEval(input: Any): Any = {
val geom = JTSTypes.GeometryTypeInstance.deserialize(input)
val extent = Extent(geom.getEnvelopeInternal)
- CatalystSerializer[Extent].toInternalRow(extent)
+ extent.toInternalRow
}
}
-object GeometryToBounds {
- import astraea.spark.rasterframes.encoders.StandardEncoders._
+object GeometryToExtent {
+ import org.locationtech.rasterframes.encoders.StandardEncoders._
def apply(bounds: Column): TypedColumn[Any, Extent] =
- new Column(new GeometryToBounds(bounds.expr)).as[Extent]
+ new Column(new GeometryToExtent(bounds.expr)).as[Extent]
}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/Mask.scala
similarity index 79%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/Mask.scala
index 03e81efc2..106a52a7b 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/Mask.scala
@@ -19,10 +19,11 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.DynamicExtractors._
-import astraea.spark.rasterframes.expressions.row
+package org.locationtech.rasterframes.expressions.transformers
+
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.DynamicExtractors._
+import org.locationtech.rasterframes.expressions.row
import com.typesafe.scalalogging.LazyLogging
import geotrellis.raster
import geotrellis.raster.Tile
@@ -81,7 +82,7 @@ abstract class Mask(val left: Expression, val middle: Expression, val right: Exp
}
}
object Mask {
- import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
+ import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder
@ExpressionDescription(
usage = "_FUNC_(target, mask) - Generate a tile with the values from the data tile, but where cells in the masking tile contain NODATA, replace the data value with NODATA.",
@@ -96,7 +97,7 @@ object Mask {
)
case class MaskByDefined(target: Expression, mask: Expression)
extends Mask(target, mask, Literal(0), false) {
- override def nodeName: String = "mask"
+ override def nodeName: String = "rf_mask"
}
object MaskByDefined {
def apply(targetTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
@@ -116,7 +117,7 @@ object Mask {
)
case class InverseMaskByDefined(leftTile: Expression, rightTile: Expression)
extends Mask(leftTile, rightTile, Literal(0), true) {
- override def nodeName: String = "inverse_mask"
+ override def nodeName: String = "rf_inverse_mask"
}
object InverseMaskByDefined {
def apply(srcTile: Column, maskingTile: Column): TypedColumn[Any, Tile] =
@@ -136,10 +137,32 @@ object Mask {
)
case class MaskByValue(leftTile: Expression, rightTile: Expression, maskValue: Expression)
extends Mask(leftTile, rightTile, maskValue, false) {
- override def nodeName: String = "mask_by_value"
+ override def nodeName: String = "rf_mask_by_value"
}
object MaskByValue {
def apply(srcTile: Column, maskingTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
new Column(MaskByValue(srcTile.expr, maskingTile.expr, maskValue.expr)).as[Tile]
}
+
+ @ExpressionDescription(
+ usage = "_FUNC_(target, mask, maskValue) - Generate a tile with the values from the data tile, but where cells in the masking tile DO NOT contain the masking value, replace the data value with NODATA.",
+ arguments = """
+ Arguments:
+ * target - tile to mask
+ * mask - masking definition
+ * maskValue - value in the `mask` for which to mark `target` as data cells
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(target, mask, maskValue);
+ ..."""
+ )
+ case class InverseMaskByValue(leftTile: Expression, rightTile: Expression, maskValue: Expression)
+ extends Mask(leftTile, rightTile, maskValue, true) {
+ override def nodeName: String = "rf_inverse_mask_by_value"
+ }
+ object InverseMaskByValue {
+ def apply(srcTile: Column, maskingTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
+ new Column(InverseMaskByValue(srcTile.expr, maskingTile.expr, maskValue.expr)).as[Tile]
+ }
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RasterRefToTile.scala
similarity index 70%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RasterRefToTile.scala
index c3aa3f337..f0c82c6de 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RasterRefToTile.scala
@@ -19,19 +19,18 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
+package org.locationtech.rasterframes.expressions.transformers
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.expressions.row
-import astraea.spark.rasterframes.ref.RasterRef
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.row
import com.typesafe.scalalogging.LazyLogging
-import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression}
import org.apache.spark.sql.rf._
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.ref.RasterRef
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
/**
* Realizes a RasterRef into a Tile.
@@ -43,19 +42,18 @@ case class RasterRefToTile(child: Expression) extends UnaryExpression
override def nodeName: String = "raster_ref_to_tile"
- override def inputTypes = Seq(CatalystSerializer[RasterRef].schema)
+ override def inputTypes = Seq(schemaOf[RasterRef])
- override def dataType: DataType = new TileUDT
+ override def dataType: DataType = schemaOf[ProjectedRasterTile]
override protected def nullSafeEval(input: Any): Any = {
implicit val ser = TileUDT.tileSerializer
val ref = row(input).to[RasterRef]
- (ref.tile: Tile).toInternalRow
+ ref.tile.toInternalRow
}
}
object RasterRefToTile {
- import astraea.spark.rasterframes.encoders.StandardEncoders._
- def apply(rr: Column): TypedColumn[Any, Tile] =
- new Column(RasterRefToTile(rr.expr)).as[Tile]
+ def apply(rr: Column): TypedColumn[Any, ProjectedRasterTile] =
+ new Column(RasterRefToTile(rr.expr)).as[ProjectedRasterTile]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ReprojectGeometry.scala
similarity index 53%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ReprojectGeometry.scala
index 7e78c5942..9c1ab2234 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ReprojectGeometry.scala
@@ -19,37 +19,54 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
+package org.locationtech.rasterframes.expressions.transformers
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, serialized_literal}
-import astraea.spark.rasterframes.jts.ReprojectionTransformer
-import com.vividsolutions.jts.geom.Geometry
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.serialized_literal
+import org.locationtech.jts.geom.Geometry
import geotrellis.proj4.CRS
import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.jts.JTSTypes
+import org.apache.spark.sql.jts.{AbstractGeometryUDT, JTSTypes}
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.expressions.DynamicExtractors
+import org.locationtech.rasterframes.jts.ReprojectionTransformer
+import org.locationtech.rasterframes.model.LazyCRS
-/**
- *
- *
- * @since 11/29/18
- */
+@ExpressionDescription(
+ usage = "_FUNC_(geom, srcCRS, dstCRS) - Reprojects the given `geom` from `srcCRS` to `dstCRS",
+ arguments = """
+ Arguments:
+ * geom - the geometry column to reproject
+ * srcCRS - the CRS of the `geom` column
+ * dstCRS - the CRS to project geometry into""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(geom, srcCRS, dstCRS);
+ ..."""
+)
case class ReprojectGeometry(geometry: Expression, srcCRS: Expression, dstCRS: Expression) extends Expression
- with CodegenFallback with ExpectsInputTypes {
+ with CodegenFallback {
- override def nodeName: String = "reproject_geometry"
+ override def nodeName: String = "st_reproject"
override def dataType: DataType = JTSTypes.GeometryTypeInstance
override def nullable: Boolean = geometry.nullable || srcCRS.nullable || dstCRS.nullable
override def children: Seq[Expression] = Seq(geometry, srcCRS, dstCRS)
- private def crsSerde = CatalystSerializer[CRS]
- override val inputTypes = Seq(
- dataType, crsSerde.schema, crsSerde.schema
- )
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (!geometry.dataType.isInstanceOf[AbstractGeometryUDT[_]])
+ TypeCheckFailure(s"Input type '${geometry.dataType}' does not conform to a geometry type.")
+ else if(!DynamicExtractors.crsExtractor.isDefinedAt(srcCRS.dataType))
+ TypeCheckFailure(s"Input type '${srcCRS.dataType}' cannot be interpreted as a CRS.")
+ else if(!DynamicExtractors.crsExtractor.isDefinedAt(dstCRS.dataType))
+ TypeCheckFailure(s"Input type '${dstCRS.dataType}' cannot be interpreted as a CRS.")
+ else TypeCheckResult.TypeCheckSuccess
+ }
/** Reprojects a geometry column from one CRS to another. */
val reproject: (Geometry, CRS, CRS) ⇒ Geometry =
@@ -59,10 +76,15 @@ case class ReprojectGeometry(geometry: Expression, srcCRS: Expression, dstCRS: E
}
override def eval(input: InternalRow): Any = {
- val geom = JTSTypes.GeometryTypeInstance.deserialize(geometry.eval(input))
- val src = srcCRS.eval(input).asInstanceOf[InternalRow].to[CRS]
- val dst = dstCRS.eval(input).asInstanceOf[InternalRow].to[CRS]
- JTSTypes.GeometryTypeInstance.serialize(reproject(geom, src, dst))
+ val src = DynamicExtractors.crsExtractor(srcCRS.dataType)(srcCRS.eval(input))
+ val dst = DynamicExtractors.crsExtractor(dstCRS.dataType)(dstCRS.eval(input))
+ (src, dst) match {
+ // Optimized pass-through case.
+ case (s: LazyCRS, r: LazyCRS) if s.encoded == r.encoded => geometry.eval(input)
+ case _ =>
+ val geom = JTSTypes.GeometryTypeInstance.deserialize(geometry.eval(input))
+ JTSTypes.GeometryTypeInstance.serialize(reproject(geom, src, dst))
+ }
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/SetCellType.scala
similarity index 86%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/SetCellType.scala
index 96fcd4288..6990e34df 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/SetCellType.scala
@@ -19,13 +19,12 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
+package org.locationtech.rasterframes.expressions.transformers
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.encoders.StandardEncoders._
-import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor
-import astraea.spark.rasterframes.expressions.row
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.StandardEncoders._
+import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor
+import org.locationtech.rasterframes.expressions.row
import geotrellis.raster.{CellType, Tile}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
@@ -33,7 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure,
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression}
import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.rf._
+import org.apache.spark.sql.rf.{TileUDT, WithTypeConformity}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.unsafe.types.UTF8String
@@ -50,7 +49,7 @@ case class SetCellType(tile: Expression, cellType: Expression)
override def nodeName: String = "set_cell_type"
override def dataType: DataType = left.dataType
- private val ctSchema = CatalystSerializer[CellType].schema
+ private val ctSchema = schemaOf[CellType]
override def checkInputDataTypes(): TypeCheckResult = {
if (!tileExtractor.isDefinedAt(left.dataType))
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala
similarity index 83%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala
index 02a4bc4e8..5d7786f1c 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala
@@ -19,15 +19,16 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
-import astraea.spark.rasterframes.expressions.UnaryRasterOp
-import astraea.spark.rasterframes.model.TileContext
+package org.locationtech.rasterframes.expressions.transformers
+
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.types.{DataType, DataTypes, DoubleType}
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Coverts the contents of the given tile to an array of double floating-point values",
@@ -36,14 +37,14 @@ import org.apache.spark.sql.{Column, TypedColumn}
* tile - tile to convert"""
)
case class TileToArrayDouble(child: Expression) extends UnaryRasterOp with CodegenFallback {
- override def nodeName: String = "tile_to_array_double"
+ override def nodeName: String = "rf_tile_to_array_double"
override def dataType: DataType = DataTypes.createArrayType(DoubleType, false)
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = {
ArrayData.toArrayData(tile.toArrayDouble())
}
}
object TileToArrayDouble {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc
def apply(tile: Column): TypedColumn[Any, Array[Double]] =
new Column(TileToArrayDouble(tile.expr)).as[Array[Double]]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala
similarity index 81%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala
index 31ad81516..c299d57c7 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala
@@ -19,16 +19,17 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
+package org.locationtech.rasterframes.expressions.transformers
-import astraea.spark.rasterframes.expressions.UnaryRasterOp
-import astraea.spark.rasterframes.model.TileContext
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription}
import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.types.{DataType, DataTypes, IntegerType}
import org.apache.spark.sql.{Column, TypedColumn}
+import org.locationtech.rasterframes.expressions.UnaryRasterOp
+import org.locationtech.rasterframes.model.TileContext
@ExpressionDescription(
usage = "_FUNC_(tile) - Coverts the contents of the given tile to an array of integer values",
@@ -37,14 +38,14 @@ import org.apache.spark.sql.{Column, TypedColumn}
* tile - tile to convert"""
)
case class TileToArrayInt(child: Expression) extends UnaryRasterOp with CodegenFallback {
- override def nodeName: String = "tile_to_array_int"
+ override def nodeName: String = "rf_tile_to_array_int"
override def dataType: DataType = DataTypes.createArrayType(IntegerType, false)
override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = {
ArrayData.toArrayData(tile.toArray())
}
}
object TileToArrayInt {
- import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc
+ import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc
def apply(tile: Column): TypedColumn[Any, Array[Int]] =
new Column(TileToArrayInt(tile.expr)).as[Array[Int]]
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/URIToRasterSource.scala
similarity index 61%
rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala
rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/URIToRasterSource.scala
index 0821e43db..903e62dde 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/URIToRasterSource.scala
@@ -19,20 +19,18 @@
*
*/
-package astraea.spark.rasterframes.expressions.transformers
+package org.locationtech.rasterframes.expressions.transformers
import java.net.URI
-import astraea.spark.rasterframes.ref.RasterSource.ReadCallback
-import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
+import org.locationtech.rasterframes.RasterSourceType
import com.typesafe.scalalogging.LazyLogging
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression}
-import org.apache.spark.sql.rf._
import org.apache.spark.sql.types.{DataType, StringType}
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.unsafe.types.UTF8String
-
+import org.locationtech.rasterframes.ref.RasterSource
/**
* Catalyst generator to convert a geotiff download URL into a series of rows
@@ -40,28 +38,24 @@ import org.apache.spark.unsafe.types.UTF8String
*
* @since 5/4/18
*/
-case class URIToRasterSource(override val child: Expression, accumulator: Option[ReadCallback])
+case class URIToRasterSource(override val child: Expression)
extends UnaryExpression with ExpectsInputTypes with CodegenFallback with LazyLogging {
- override def nodeName: String = "uri_to_raster_source"
+ override def nodeName: String = "rf_uri_to_raster_source"
- override def dataType: DataType = new RasterSourceUDT
+ override def dataType: DataType = RasterSourceType
override def inputTypes = Seq(StringType)
override protected def nullSafeEval(input: Any): Any = {
val uriString = input.asInstanceOf[UTF8String].toString
val uri = URI.create(uriString)
- val ref = RasterSource(uri, accumulator)
- RasterSourceUDT.serialize(ref)
+ val ref = RasterSource(uri)
+ RasterSourceType.serialize(ref)
}
}
object URIToRasterSource {
- def apply(rasterURI: Column): TypedColumn[Any, RasterRef] =
- new Column(new URIToRasterSource(rasterURI.expr, None)).as[RasterRef]
- def apply(rasterURI: Column, accumulator: ReadCallback): TypedColumn[Any, RasterRef] =
- new Column(new URIToRasterSource(rasterURI.expr, Option(accumulator))).as[RasterRef]
- def apply(rasterURI: Column, accumulator: Option[ReadCallback]): TypedColumn[Any, RasterRef] =
- new Column(new URIToRasterSource(rasterURI.expr, accumulator)).as[RasterRef]
+ def apply(rasterURI: Column): TypedColumn[Any, RasterSource] =
+ new Column(new URIToRasterSource(rasterURI.expr)).as[RasterSource]
}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/ContextRDDMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/ContextRDDMethods.scala
new file mode 100644
index 000000000..7bf3230b3
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/ContextRDDMethods.scala
@@ -0,0 +1,73 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2017 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.extensions
+
+import org.locationtech.rasterframes.PairRDDConverter._
+import org.locationtech.rasterframes.StandardColumns._
+import Implicits._
+import org.locationtech.rasterframes.util._
+import org.locationtech.rasterframes.RasterFrameLayer
+import geotrellis.raster.CellGrid
+import geotrellis.spark._
+import geotrellis.spark.io._
+import geotrellis.util.MethodExtensions
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
+import org.locationtech.rasterframes.PairRDDConverter
+
+/**
+ * Extension method on `ContextRDD`-shaped RDDs with appropriate context bounds to create a RasterFrameLayer.
+ * @since 7/18/17
+ */
+abstract class SpatialContextRDDMethods[T <: CellGrid](implicit spark: SparkSession)
+ extends MethodExtensions[RDD[(SpatialKey, T)] with Metadata[TileLayerMetadata[SpatialKey]]] {
+ import PairRDDConverter._
+
+ def toLayer(implicit converter: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = toLayer(TILE_COLUMN.columnName)
+
+ def toLayer(tileColumnName: String)(implicit converter: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = {
+ val df = self.toDataFrame.setSpatialColumnRole(SPATIAL_KEY_COLUMN, self.metadata)
+ val defName = TILE_COLUMN.columnName
+ df.mapWhen(_ ⇒ tileColumnName != defName, _.withColumnRenamed(defName, tileColumnName))
+ .certify
+ }
+}
+
+/**
+ * Extension method on `ContextRDD`-shaped `Tile` RDDs keyed with [[SpaceTimeKey]], with appropriate context bounds to create a RasterFrameLayer.
+ * @since 9/11/17
+ */
+abstract class SpatioTemporalContextRDDMethods[T <: CellGrid](
+ implicit spark: SparkSession)
+ extends MethodExtensions[RDD[(SpaceTimeKey, T)] with Metadata[TileLayerMetadata[SpaceTimeKey]]] {
+
+ def toLayer(implicit converter: PairRDDConverter[SpaceTimeKey, T]): RasterFrameLayer = toLayer(TILE_COLUMN.columnName)
+
+ def toLayer(tileColumnName: String)(implicit converter: PairRDDConverter[SpaceTimeKey, T]): RasterFrameLayer = {
+ val df = self.toDataFrame
+ .setSpatialColumnRole(SPATIAL_KEY_COLUMN, self.metadata)
+ .setTemporalColumnRole(TEMPORAL_KEY_COLUMN)
+ val defName = TILE_COLUMN.columnName
+ df.mapWhen(_ ⇒ tileColumnName != defName, _.withColumnRenamed(defName, tileColumnName))
+ .certify
+ }
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/DataFrameMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/DataFrameMethods.scala
new file mode 100644
index 000000000..1e94ff3ca
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/DataFrameMethods.scala
@@ -0,0 +1,305 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2017 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.extensions
+
+import geotrellis.proj4.CRS
+import geotrellis.spark.io._
+import geotrellis.spark.{SpaceTimeKey, SpatialComponent, SpatialKey, TemporalKey, TileLayerMetadata}
+import geotrellis.util.MethodExtensions
+import geotrellis.vector.Extent
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.types.{MetadataBuilder, StructField}
+import org.apache.spark.sql.{Column, DataFrame, TypedColumn}
+import org.locationtech.rasterframes.StandardColumns._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.StandardEncoders._
+import org.locationtech.rasterframes.expressions.DynamicExtractors
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
+import org.locationtech.rasterframes.util._
+import org.locationtech.rasterframes.{MetadataKeys, RasterFrameLayer}
+import spray.json.JsonFormat
+
+import scala.util.Try
+
+/**
+ * Extension methods over [[DataFrame]].
+ *
+ * @since 7/18/17
+ */
+trait DataFrameMethods[DF <: DataFrame] extends MethodExtensions[DF] with MetadataKeys {
+ import Implicits.{WithDataFrameMethods, WithMetadataBuilderMethods, WithMetadataMethods, WithRasterFrameLayerMethods}
+
+ private def selector(column: Column) = (attr: Attribute) ⇒
+ attr.name == column.columnName || attr.semanticEquals(column.expr)
+
+ /** Map over the Attribute representation of Columns, modifying the one matching `column` with `op`. */
+ private[rasterframes] def mapColumnAttribute(column: Column, op: Attribute ⇒ Attribute): DF = {
+ val analyzed = self.queryExecution.analyzed.output
+ val selects = selector(column)
+ val attrs = analyzed.map { attr ⇒
+ if(selects(attr)) op(attr) else attr
+ }
+ self.select(attrs.map(a ⇒ new Column(a)): _*).asInstanceOf[DF]
+ }
+
+ private[rasterframes] def addColumnMetadata(column: Column, op: MetadataBuilder ⇒ MetadataBuilder): DF = {
+ mapColumnAttribute(column, attr ⇒ {
+ val md = new MetadataBuilder().withMetadata(attr.metadata)
+ attr.withMetadata(op(md).build)
+ })
+ }
+
+ private[rasterframes] def fetchMetadataValue[D](column: Column, reader: (Attribute) ⇒ D): Option[D] = {
+ val analyzed = self.queryExecution.analyzed.output
+ analyzed.find(selector(column)).map(reader)
+ }
+
+ private[rasterframes]
+ def setSpatialColumnRole[K: SpatialComponent: JsonFormat](
+ column: Column, md: TileLayerMetadata[K]): DF =
+ addColumnMetadata(column,
+ _.attachContext(md.asColumnMetadata).tagSpatialKey
+ )
+
+ private[rasterframes]
+ def setTemporalColumnRole(column: Column): DF =
+ addColumnMetadata(column, _.tagTemporalKey)
+
+ /** Get the role tag the column plays in the RasterFrameLayer, if any. */
+ private[rasterframes]
+ def getColumnRole(column: Column): Option[String] =
+ fetchMetadataValue(column, _.metadata.getString(SPATIAL_ROLE_KEY))
+
+ /** Get the columns that are of type `Tile` */
+ def tileColumns: Seq[Column] =
+ self.schema.fields
+ .filter(f => DynamicExtractors.tileExtractor.isDefinedAt(f.dataType))
+ .map(f ⇒ self.col(f.name))
+
+ /** Get the columns that look like `ProjectedRasterTile`s. */
+ def projRasterColumns: Seq[Column] =
+ self.schema.fields
+ .filter(_.dataType.conformsTo[ProjectedRasterTile])
+ .map(f => self.col(f.name))
+
+ /** Get the columns that look like `Extent`s. */
+ def extentColumns: Seq[Column] =
+ self.schema.fields
+ .filter(_.dataType.conformsTo[Extent])
+ .map(f => self.col(f.name))
+
+ /** Get the columns that look like `CRS`s. */
+ def crsColumns: Seq[Column] =
+ self.schema.fields
+ .filter(_.dataType.conformsTo[CRS])
+ .map(f => self.col(f.name))
+
+ /** Get the columns that are not of type `Tile` */
+ def notTileColumns: Seq[Column] =
+ self.schema.fields
+ .filter(f => !DynamicExtractors.tileExtractor.isDefinedAt(f.dataType))
+ .map(f ⇒ self.col(f.name))
+
+ /** Get the spatial column. */
+ def spatialKeyColumn: Option[TypedColumn[Any, SpatialKey]] = {
+ val key = findSpatialKeyField
+ key
+ .map(_.name)
+ .map(self.col(_).as[SpatialKey])
+ }
+
+ /** Get the temporal column, if any. */
+ def temporalKeyColumn: Option[TypedColumn[Any, TemporalKey]] = {
+ val key = findTemporalKeyField
+ key.map(_.name).map(self.col(_).as[TemporalKey])
+ }
+
+ /** Find the field tagged with the requested `role` */
+ private[rasterframes] def findRoleField(role: String): Option[StructField] =
+ self.schema.fields.find(
+ f ⇒
+ f.metadata.contains(SPATIAL_ROLE_KEY) &&
+ f.metadata.getString(SPATIAL_ROLE_KEY) == role
+ )
+
+ /** The spatial key is the first one found with context metadata attached to it. */
+ private[rasterframes] def findSpatialKeyField: Option[StructField] =
+ findRoleField(SPATIAL_KEY_COLUMN.columnName)
+
+ /** The temporal key is the first one found with the temporal tag. */
+ private[rasterframes] def findTemporalKeyField: Option[StructField] =
+ findRoleField(TEMPORAL_KEY_COLUMN.columnName)
+
+ /** Renames all columns such that they start with the given prefix string.
+ * Useful for preparing dataframes for joins where duplicate names may arise.
+ */
+ def withPrefixedColumnNames(prefix: String): DF =
+ self.columns.foldLeft(self)((df, c) ⇒ df.withColumnRenamed(c, s"$prefix$c").asInstanceOf[DF])
+
+ /**
+ * Performs a jeft join on the dataframe `right` to this one, reprojecting and merging tiles as necessary.
+ * The operation is logically a "left outer" join, with the left side also determining the target CRS and extents.
+ * Right side may have multiple Tile columns. Assumes both dataframes use the column names `extent` and `crs` for
+ * the Extent and CRS details for each row. The join expression used is:
+ *
+ * {{{
+ * st_intersects(st_geometry(leftExtent), st_reproject(st_geometry(rightExtent), rightCRS, leftCRS))
+ * }}}
+ *
+ * @param right Right side of the join.
+ * @return joined dataframe
+ */
+ def rasterJoin(right: DataFrame): DataFrame = RasterJoin(self, right)
+
+ /**
+ * Performs a jeft join on the dataframe `right` to this one, reprojecting and merging tiles as necessary.
+ * The operation is logically a "left outer" join, with the left side also determining the target CRS and extents.
+ * Right side may have multiple Tile columns. This variant allows for the specific geospatial columns to be
+ * specified. The join expression used is:
+ * {{{
+ * st_intersects(st_geometry(leftExtent), st_reproject(st_geometry(rightExtent), rightCRS, leftCRS))
+ * }}}
+ *
+ * @param right right dataframe
+ * @param leftExtent this (left) dataframe's Extent column
+ * @param leftCRS this (left) datafrasme's CRS column
+ * @param rightExtent right dataframe's CRS extent
+ * @param rightCRS right dataframe's CRS column
+ * @return joined dataframe
+ */
+ def rasterJoin(right: DataFrame, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column): DataFrame =
+ RasterJoin(self, right, leftExtent, leftCRS, rightExtent, rightCRS)
+
+ /**
+ * Performs a jeft join on the dataframe `right` to this one, reprojecting and merging tiles as necessary.
+ * The operation is logically a "left outer" join, with the left side also determining the target CRS and extents.
+ * Right side may have multiple Tile columns. This variant allows for the specific geospatial columns and join
+ * expression to be specified.
+ *
+ * @param right right dataframe
+ * @param leftExtent this (left) dataframe's Extent column
+ * @param joinExpr join expression
+ * @param leftCRS this (left) datafrasme's CRS column
+ * @param rightExtent right dataframe's CRS extent
+ * @param rightCRS right dataframe's CRS column
+ * @return joined dataframe
+ */
+ def rasterJoin(right: DataFrame, joinExpr: Column, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column): DataFrame =
+ RasterJoin(self, right, joinExpr, leftExtent, leftCRS, rightExtent, rightCRS)
+
+
+ /** Layout contents of RasterFrame to a layer. Assumes CRS and extent columns exist. */
+ def toLayer(tlm: TileLayerMetadata[SpatialKey]): RasterFrameLayer = ReprojectToLayer(self, tlm)
+
+ /** Coerces this DataFrame to a RasterFrameLayer after ensuring it has:
+ *
+ *
+ * - a space or space-time key column
+ *
- one or more tile columns
+ *
- tile layout metadata
+ *
+ *
+ * If any of the above are violated, and [[IllegalArgumentException]] is thrown.
+ *
+ * @return validated RasterFrameLayer
+ * @throws IllegalArgumentException when constraints are not met.
+ */
+ @throws[IllegalArgumentException]
+ def asLayer: RasterFrameLayer = {
+ val potentialRF = certifyRasterframe(self)
+
+ require(
+ potentialRF.findSpatialKeyField.nonEmpty,
+ "A RasterFrameLayer requires a column identified as a spatial key"
+ )
+
+ require(potentialRF.tileColumns.nonEmpty, "A RasterFrameLayer requires at least one tile column")
+
+ require(
+ Try(potentialRF.tileLayerMetadata).isSuccess,
+ "A RasterFrameLayer requires embedded TileLayerMetadata"
+ )
+
+ potentialRF
+ }
+
+ /**
+ * Convert DataFrame already in a uniform gridding into a RasterFrameLayer
+ *
+ * @param spatialKey The column where the spatial key is stored
+ * @param tlm Metadata describing layout under which tiles were created. Note: no checking is
+ * performed to ensure metadata, key-space, and tiles are coherent.
+ * @throws IllegalArgumentException when constraints outlined in `asLayer` are not met.
+ * @return Encoded RasterFrameLayer
+ */
+ @throws[IllegalArgumentException]
+ private[rasterframes]
+ def asLayer(spatialKey: Column, tlm: TileLayerMetadata[SpatialKey]): RasterFrameLayer =
+ setSpatialColumnRole(spatialKey, tlm).asLayer
+
+ /**
+ * Convert DataFrame already in a uniform gridding into a RasterFrameLayer
+ *
+ * @param spatialKey The column where the spatial key is stored
+ * @param temporalKey The column tagged under the temporal role
+ * @param tlm Metadata describing layout under which tiles were created. Note: no checking is
+ * performed to ensure metadata, key-space, and tiles are coherent.
+ * @throws IllegalArgumentException when constraints outlined in `asLayer` are not met.
+ * @return Encoded RasterFrameLayer
+ */
+ @throws[IllegalArgumentException]
+ private[rasterframes]
+ def asLayer(spatialKey: Column, temporalKey: Column, tlm: TileLayerMetadata[SpaceTimeKey]): RasterFrameLayer =
+ setSpatialColumnRole(spatialKey, tlm)
+ .setTemporalColumnRole(temporalKey)
+ .asLayer
+
+ /**
+ * Converts [[DataFrame]] to a RasterFrameLayer if the following constraints are fulfilled:
+ *
+ *
+ * - a space or space-time key column
+ *
- one or more tile columns
+ *
- tile layout metadata
+ *
+ *
+ * @return Some[RasterFrameLayer] if constraints fulfilled, [[None]] otherwise.
+ */
+ def asLayerSafely: Option[RasterFrameLayer] = Try(asLayer).toOption
+
+ /**
+ * Tests for the following conditions on the [[DataFrame]]:
+ *
+ *
+ * - a space or space-time key column
+ *
- one or more tile columns
+ *
- tile layout metadata
+ *
+ *
+ * @return true if all constraints are fulfilled, false otherwise.
+ */
+ def isAlreadyLayer: Boolean = Try(asLayer).isSuccess
+
+ /** Internal method for slapping the RasterFreameLayer seal of approval on a DataFrame.
+ * Only call if if you are sure it has a spatial key and tile columns and TileLayerMetadata. */
+ private[rasterframes] def certify = certifyRasterframe(self)
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/Implicits.scala
similarity index 82%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/Implicits.scala
index 8fdda51a0..563e03e87 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/Implicits.scala
@@ -15,16 +15,18 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.extensions
+package org.locationtech.rasterframes.extensions
-import astraea.spark.rasterframes.RasterFrame
-import astraea.spark.rasterframes.util.{WithMergeMethods, WithPrototypeMethods}
+import org.locationtech.rasterframes.RasterFrameLayer
+import org.locationtech.rasterframes.util.{WithMergeMethods, WithPrototypeMethods}
import geotrellis.raster._
+import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.spark.{Metadata, SpaceTimeKey, SpatialKey, TileLayerMetadata}
import geotrellis.util.MethodExtensions
-import org.apache.hadoop.conf.{Configuration => HadoopConfiguration}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
@@ -50,9 +52,11 @@ trait Implicits {
implicit class WithProjectedRasterMethods[T <: CellGrid: WithMergeMethods: WithPrototypeMethods: TypeTag](
val self: ProjectedRaster[T]) extends ProjectedRasterMethods[T]
+ implicit class WithSinglebandGeoTiffMethods(val self: SinglebandGeoTiff) extends SinglebandGeoTiffMethods
+
implicit class WithDataFrameMethods[D <: DataFrame](val self: D) extends DataFrameMethods[D]
- implicit class WithRasterFrameMethods(val self: RasterFrame) extends RasterFrameMethods
+ implicit class WithRasterFrameLayerMethods(val self: RasterFrameLayer) extends RasterFrameLayerMethods
implicit class WithSpatialContextRDDMethods[T <: CellGrid](
val self: RDD[(SpatialKey, T)] with Metadata[TileLayerMetadata[SpatialKey]]
@@ -62,17 +66,17 @@ trait Implicits {
val self: RDD[(SpaceTimeKey, T)] with Metadata[TileLayerMetadata[SpaceTimeKey]]
)(implicit spark: SparkSession) extends SpatioTemporalContextRDDMethods[T]
- private[astraea]
+ private[rasterframes]
implicit class WithMetadataMethods[R: JsonFormat](val self: R)
extends MetadataMethods[R]
- private[astraea]
+ private[rasterframes]
implicit class WithMetadataAppendMethods(val self: SMetadata)
extends MethodExtensions[SMetadata] {
def append = new MetadataBuilder().withMetadata(self)
}
- private[astraea]
+ private[rasterframes]
implicit class WithMetadataBuilderMethods(val self: MetadataBuilder)
extends MetadataBuilderMethods
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/KryoMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/KryoMethods.scala
similarity index 93%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/KryoMethods.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/KryoMethods.scala
index 52ed69557..7b291d7d6 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/KryoMethods.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/KryoMethods.scala
@@ -19,12 +19,12 @@
*
*/
-package astraea.spark.rasterframes.extensions
-import astraea.spark.rasterframes.util.RFKryoRegistrator
+package org.locationtech.rasterframes.extensions
import geotrellis.util.MethodExtensions
import org.apache.spark.SparkConf
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.SparkSession
+import org.locationtech.rasterframes.util.RFKryoRegistrator
object KryoMethods {
val kryoProperties = Map("spark.serializer" -> classOf[KryoSerializer].getName,
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataBuilderMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataBuilderMethods.scala
similarity index 85%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataBuilderMethods.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataBuilderMethods.scala
index 491c30b4d..fc2401bb5 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataBuilderMethods.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataBuilderMethods.scala
@@ -15,14 +15,16 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.extensions
+package org.locationtech.rasterframes.extensions
-import astraea.spark.rasterframes.{MetadataKeys, StandardColumns}
import geotrellis.util.MethodExtensions
import org.apache.spark.sql.types.{Metadata, MetadataBuilder}
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes.util._
+import org.locationtech.rasterframes.{MetadataKeys, StandardColumns}
/**
* Convenience to deal with boilerplate associated with adding
@@ -30,7 +32,7 @@ import astraea.spark.rasterframes.util._
*
* @since 12/21/17
*/
-private[astraea]
+private[rasterframes]
abstract class MetadataBuilderMethods extends MethodExtensions[MetadataBuilder] with MetadataKeys with StandardColumns {
def attachContext(md: Metadata) = self.putMetadata(CONTEXT_METADATA_KEY, md)
def tagSpatialKey = self.putString(SPATIAL_ROLE_KEY, SPATIAL_KEY_COLUMN.columnName)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataMethods.scala
similarity index 60%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataMethods.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataMethods.scala
index e1a886e60..5d96abdf4 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataMethods.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataMethods.scala
@@ -1,20 +1,26 @@
/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
* Copyright 2017 Astraea, Inc.
*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.extensions
+package org.locationtech.rasterframes.extensions
+
import geotrellis.util.MethodExtensions
import spray.json.{JsObject, JsonFormat}
import org.apache.spark.sql.types.{Metadata ⇒ SQLMetadata}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/ProjectedRasterMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/ProjectedRasterMethods.scala
similarity index 66%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/ProjectedRasterMethods.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/ProjectedRasterMethods.scala
index 96709ef10..81f5054f9 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/ProjectedRasterMethods.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/ProjectedRasterMethods.scala
@@ -1,20 +1,41 @@
-package astraea.spark.rasterframes.extensions
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.extensions
import java.time.ZonedDateTime
-import astraea.spark.rasterframes.util._
-import astraea.spark.rasterframes.{PairRDDConverter, RasterFrame, StandardColumns}
import geotrellis.raster.{CellGrid, ProjectedRaster}
import geotrellis.spark._
import geotrellis.spark.tiling._
import geotrellis.util.MethodExtensions
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
+import org.locationtech.rasterframes.util._
+import org.locationtech.rasterframes.{PairRDDConverter, RasterFrameLayer, StandardColumns}
import scala.reflect.runtime.universe._
/**
- * Extension methods on [[ProjectedRaster]] for creating [[RasterFrame]]s.
+ * Extension methods on [[ProjectedRaster]] for creating [[RasterFrameLayer]]s.
*
* @since 8/10/17
*/
@@ -24,63 +45,64 @@ abstract class ProjectedRasterMethods[T <: CellGrid: WithMergeMethods: WithProto
type XTileLayerRDD[K] = RDD[(K, T)] with Metadata[TileLayerMetadata[K]]
/**
- * Convert the wrapped [[ProjectedRaster]] into a [[RasterFrame]] with a
+ * Convert the wrapped [[ProjectedRaster]] into a [[RasterFrameLayer]] with a
* single row.
*
- * @param spark [[SparkSession]] in which to create [[RasterFrame]]
+ * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]]
*/
- def toRF(implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrame = toRF(TILE_COLUMN.columnName)
+ def toLayer(implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrameLayer =
+ toLayer(TILE_COLUMN.columnName)
/**
- * Convert the wrapped [[ProjectedRaster]] into a [[RasterFrame]] with a
+ * Convert the wrapped [[ProjectedRaster]] into a [[RasterFrameLayer]] with a
* single row.
*
- * @param spark [[SparkSession]] in which to create [[RasterFrame]]
+ * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]]
*/
- def toRF(tileColName: String)
- (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrame = {
+ def toLayer(tileColName: String)
+ (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = {
val (cols, rows) = self.raster.dimensions
- toRF(cols, rows, tileColName)
+ toLayer(cols, rows, tileColName)
}
/**
- * Convert the [[ProjectedRaster]] into a [[RasterFrame]] using the
+ * Convert the [[ProjectedRaster]] into a [[RasterFrameLayer]] using the
* given dimensions as the target per-row tile size.
*
* @param tileCols Max number of horizontal cells per tile
* @param tileRows Max number of vertical cells per tile
- * @param spark [[SparkSession]] in which to create [[RasterFrame]]
+ * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]]
*/
- def toRF(tileCols: Int, tileRows: Int)
- (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrame =
- toRF(tileCols, tileRows, TILE_COLUMN.columnName)
+ def toLayer(tileCols: Int, tileRows: Int)
+ (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrameLayer =
+ toLayer(tileCols, tileRows, TILE_COLUMN.columnName)
/**
- * Convert the [[ProjectedRaster]] into a [[RasterFrame]] using the
+ * Convert the [[ProjectedRaster]] into a [[RasterFrameLayer]] using the
* given dimensions as the target per-row tile size.
*
* @param tileCols Max number of horizontal cells per tile
* @param tileRows Max number of vertical cells per tile
* @param tileColName Name to give the created tile column
- * @param spark [[SparkSession]] in which to create [[RasterFrame]]
+ * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]]
*/
- def toRF(tileCols: Int, tileRows: Int, tileColName: String)
- (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrame = {
- toTileLayerRDD(tileCols, tileRows).toRF(tileColName)
+ def toLayer(tileCols: Int, tileRows: Int, tileColName: String)
+ (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = {
+ toTileLayerRDD(tileCols, tileRows).toLayer(tileColName)
}
/**
- * Convert the [[ProjectedRaster]] into a [[RasterFrame]] using the
+ * Convert the [[ProjectedRaster]] into a [[RasterFrameLayer]] using the
* given dimensions as the target per-row tile size and singular timestamp as the temporal component.
*
* @param tileCols Max number of horizontal cells per tile
* @param tileRows Max number of vertical cells per tile.
* @param timestamp Temporal key value to assign to tiles.
- * @param spark [[SparkSession]] in which to create [[RasterFrame]]
+ * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]]
*/
- def toRF(tileCols: Int, tileRows: Int, timestamp: ZonedDateTime)
- (implicit spark: SparkSession, schema: PairRDDConverter[SpaceTimeKey, T]): RasterFrame =
- toTileLayerRDD(tileCols, tileRows, timestamp).toRF
+ def toLayer(tileCols: Int, tileRows: Int, timestamp: ZonedDateTime)
+ (implicit spark: SparkSession, schema: PairRDDConverter[SpaceTimeKey, T]): RasterFrameLayer =
+ toTileLayerRDD(tileCols, tileRows, timestamp).toLayer
/**
* Convert the [[ProjectedRaster]] into a [[TileLayerRDD[SpatialKey]] using the
@@ -92,7 +114,7 @@ abstract class ProjectedRasterMethods[T <: CellGrid: WithMergeMethods: WithProto
*/
def toTileLayerRDD(tileCols: Int,
tileRows: Int)(implicit spark: SparkSession): XTileLayerRDD[SpatialKey] = {
- val layout = LayoutDefinition(self.rasterExtent, tileCols, tileRows)
+ val layout = LayoutDefinition(self.raster.rasterExtent, tileCols, tileRows)
val kb = KeyBounds(SpatialKey(0, 0), SpatialKey(layout.layoutCols - 1, layout.layoutRows - 1))
val tlm = TileLayerMetadata(self.tile.cellType, layout, self.extent, self.crs, kb)
@@ -115,7 +137,7 @@ abstract class ProjectedRasterMethods[T <: CellGrid: WithMergeMethods: WithProto
* @param spark [[SparkSession]] in which to create RDD
*/
def toTileLayerRDD(tileCols: Int, tileRows: Int, timestamp: ZonedDateTime)(implicit spark: SparkSession): XTileLayerRDD[SpaceTimeKey] = {
- val layout = LayoutDefinition(self.rasterExtent, tileCols, tileRows)
+ val layout = LayoutDefinition(self.raster.rasterExtent, tileCols, tileRows)
val kb = KeyBounds(SpaceTimeKey(0, 0, timestamp), SpaceTimeKey(layout.layoutCols - 1, layout.layoutRows - 1, timestamp))
val tlm = TileLayerMetadata(self.tile.cellType, layout, self.extent, self.crs, kb)
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/RFSpatialColumnMethods.scala
similarity index 65%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/RFSpatialColumnMethods.scala
index af744f5f4..4eade42ad 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/RFSpatialColumnMethods.scala
@@ -15,56 +15,71 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.extensions
+package org.locationtech.rasterframes.extensions
-import astraea.spark.rasterframes.util._
-import astraea.spark.rasterframes.{RasterFrame, StandardColumns}
-import com.vividsolutions.jts.geom.{Point, Polygon}
+import org.locationtech.rasterframes.util._
+import org.locationtech.rasterframes.RasterFrameLayer
+import org.locationtech.jts.geom.Point
import geotrellis.proj4.LatLng
import geotrellis.spark.SpatialKey
import geotrellis.spark.tiling.MapKeyTransform
import geotrellis.util.MethodExtensions
+import geotrellis.vector.Extent
import org.apache.spark.sql.Row
-import org.apache.spark.sql.functions.{asc, udf ⇒ sparkUdf}
+import org.apache.spark.sql.functions.{asc, udf => sparkUdf}
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
import org.locationtech.geomesa.curve.Z2SFC
+import org.locationtech.rasterframes.StandardColumns
/**
- * RasterFrame extension methods associated with adding spatially descriptive columns.
+ * RasterFrameLayer extension methods associated with adding spatially descriptive columns.
*
* @since 12/15/17
*/
-trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with StandardColumns {
- import Implicits.{WithDataFrameMethods, WithRasterFrameMethods}
+trait RFSpatialColumnMethods extends MethodExtensions[RasterFrameLayer] with StandardColumns {
+ import Implicits.{WithDataFrameMethods, WithRasterFrameLayerMethods}
import org.locationtech.geomesa.spark.jts._
/** Returns the key-space to map-space coordinate transform. */
def mapTransform: MapKeyTransform = self.tileLayerMetadata.merge.mapTransform
- private def keyCol2Bounds: Row ⇒ Polygon = {
+ private def keyCol2Extent: Row ⇒ Extent = {
val transform = self.sparkSession.sparkContext.broadcast(mapTransform)
- (r: Row) ⇒ transform.value.keyToExtent(SpatialKey(r.getInt(0), r.getInt(1))).jtsGeom
+ r ⇒ transform.value.keyToExtent(SpatialKey(r.getInt(0), r.getInt(1)))
}
private def keyCol2LatLng: Row ⇒ (Double, Double) = {
val transform = self.sparkSession.sparkContext.broadcast(mapTransform)
val crs = self.tileLayerMetadata.merge.crs
- (r: Row) ⇒ {
+ r ⇒ {
val center = transform.value.keyToExtent(SpatialKey(r.getInt(0), r.getInt(1))).center.reproject(crs, LatLng)
(center.x, center.y)
}
}
+ /**
+ * Append a column containing the extent of the row's spatial key.
+ * Coordinates are in native CRS.
+ * @param colName name of column to append. Defaults to "extent"
+ * @return updated RasterFrameLayer
+ */
+ def withExtent(colName: String = EXTENT_COLUMN.columnName): RasterFrameLayer = {
+ val key2Extent = sparkUdf(keyCol2Extent)
+ self.withColumn(colName, key2Extent(self.spatialKeyColumn)).certify
+ }
+
/**
* Append a column containing the bounds of the row's spatial key.
* Coordinates are in native CRS.
- * @param colName name of column to append. Defaults to "bounds"
- * @return updated RasterFrame
+ * @param colName name of column to append. Defaults to "geometry"
+ * @return updated RasterFrameLayer
*/
- def withBounds(colName: String = BOUNDS_COLUMN.columnName): RasterFrame = {
- val key2Bounds = sparkUdf(keyCol2Bounds)
+ def withGeometry(colName: String = GEOMETRY_COLUMN.columnName): RasterFrameLayer = {
+ val key2Bounds = sparkUdf(keyCol2Extent andThen (_.jtsGeom))
self.withColumn(colName, key2Bounds(self.spatialKeyColumn)).certify
}
@@ -72,10 +87,10 @@ trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with Standard
* Append a column containing the center of the row's spatial key.
* Coordinate is in native CRS.
* @param colName name of column to append. Defaults to "center"
- * @return updated RasterFrame
+ * @return updated RasterFrameLayer
*/
- def withCenter(colName: String = CENTER_COLUMN.columnName): RasterFrame = {
- val key2Center = sparkUdf(keyCol2Bounds andThen (_.getCentroid))
+ def withCenter(colName: String = CENTER_COLUMN.columnName): RasterFrameLayer = {
+ val key2Center = sparkUdf(keyCol2Extent andThen (_.center.jtsGeom))
self.withColumn(colName, key2Center(self.spatialKeyColumn).as[Point]).certify
}
@@ -83,9 +98,9 @@ trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with Standard
* Append a column containing the center of the row's spatial key.
* Coordinate is in (longitude, latitude) (EPSG:4326).
* @param colName name of column to append. Defaults to "center"
- * @return updated RasterFrame
+ * @return updated RasterFrameLayer
*/
- def withCenterLatLng(colName: String = "center"): RasterFrame = {
+ def withCenterLatLng(colName: String = "center"): RasterFrameLayer = {
val key2Center = sparkUdf(keyCol2LatLng)
self.withColumn(colName, key2Center(self.spatialKeyColumn).cast(RFSpatialColumnMethods.LngLatStructType)).certify
}
@@ -94,9 +109,9 @@ trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with Standard
* Appends a spatial index column
* @param colName name of new column to create. Defaults to `index`
* @param applyOrdering if true, adds `.orderBy(asc(colName))` to result. Defaults to `true`
- * @return RasterFrame with index column.
+ * @return RasterFrameLayer with index column.
*/
- def withSpatialIndex(colName: String = SPATIAL_INDEX_COLUMN.columnName, applyOrdering: Boolean = true): RasterFrame = {
+ def withSpatialIndex(colName: String = SPATIAL_INDEX_COLUMN.columnName, applyOrdering: Boolean = true): RasterFrameLayer = {
val zindex = sparkUdf(keyCol2LatLng andThen (p ⇒ Z2SFC.index(p._1, p._2).z))
self.withColumn(colName, zindex(self.spatialKeyColumn)) match {
case rf if applyOrdering ⇒ rf.orderBy(asc(colName)).certify
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterFrameLayerMethods.scala
similarity index 79%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/RasterFrameLayerMethods.scala
index e83e55fd3..28f2839ed 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterFrameLayerMethods.scala
@@ -1,54 +1,64 @@
/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
* Copyright 2017 Astraea, Inc.
*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.extensions
+package org.locationtech.rasterframes.extensions
import java.time.ZonedDateTime
-import astraea.spark.rasterframes.util._
-import astraea.spark.rasterframes.{MetadataKeys, RasterFrame}
+import org.locationtech.rasterframes.util._
+import org.locationtech.rasterframes.RasterFrameLayer
import geotrellis.proj4.CRS
import geotrellis.raster.resample.{NearestNeighbor, ResampleMethod}
import geotrellis.raster.{MultibandTile, ProjectedRaster, Tile, TileLayout}
import geotrellis.spark._
import geotrellis.spark.io._
import geotrellis.spark.tiling.{LayoutDefinition, Tiler}
-import geotrellis.util.{LazyLogging, MethodExtensions}
+import geotrellis.util.MethodExtensions
import geotrellis.vector.ProjectedExtent
import org.apache.spark.annotation.Experimental
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{Metadata, TimestampType}
import spray.json._
-import astraea.spark.rasterframes.encoders.StandardEncoders._
-import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
+import org.locationtech.rasterframes.encoders.StandardEncoders._
+import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
+import com.typesafe.scalalogging.LazyLogging
+import org.locationtech.rasterframes.MetadataKeys
+import org.locationtech.rasterframes.tiles.ShowableTile
+
import scala.reflect.runtime.universe._
/**
- * Extension methods on [[RasterFrame]] type.
- * @since 7/18/17
+ * Extension methods on [[RasterFrameLayer]] type.
+ *
+ * @since 7/18/17
*/
-trait RasterFrameMethods extends MethodExtensions[RasterFrame]
+trait RasterFrameLayerMethods extends MethodExtensions[RasterFrameLayer]
with RFSpatialColumnMethods with MetadataKeys with LazyLogging {
- import Implicits.{WithDataFrameMethods, WithRasterFrameMethods}
+ import Implicits.{WithDataFrameMethods, WithRasterFrameLayerMethods}
/**
- * A convenience over `DataFrame.withColumnRenamed` whereby the `RasterFrame` type is maintained.
+ * A convenience over `DataFrame.withColumnRenamed` whereby the `RasterFrameLayer` type is maintained.
*/
- def withRFColumnRenamed(existingName: String, newName: String): RasterFrame =
+ def withRFColumnRenamed(existingName: String, newName: String): RasterFrameLayer =
(self: DataFrame).withColumnRenamed(existingName, newName).certify
/** Get the spatial column. */
@@ -66,7 +76,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
def tileLayerMetadata: Either[TileLayerMetadata[SpatialKey], TileLayerMetadata[SpaceTimeKey]] = {
val spatialMD = self.findSpatialKeyField
.map(_.metadata)
- .getOrElse(throw new IllegalArgumentException(s"RasterFrame operation requsted on non-RasterFrame: $self"))
+ .getOrElse(throw new IllegalArgumentException(s"RasterFrameLayer operation requsted on non-RasterFrameLayer: $self"))
if (self.findTemporalKeyField.nonEmpty)
Right(extract[TileLayerMetadata[SpaceTimeKey]](CONTEXT_METADATA_KEY)(spatialMD))
@@ -74,12 +84,12 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
Left(extract[TileLayerMetadata[SpatialKey]](CONTEXT_METADATA_KEY)(spatialMD))
}
- /** Get the CRS covering the RasterFrame. */
+ /** Get the CRS covering the RasterFrameLayer. */
def crs: CRS = tileLayerMetadata.fold(_.crs, _.crs)
- /** Add a temporal key to the RasterFrame, assigning the same temporal key to all rows. */
- def addTemporalComponent(value: TemporalKey): RasterFrame = {
- require(self.temporalKeyColumn.isEmpty, "RasterFrame already has a temporal component")
+ /** Add a temporal key to the RasterFrameLayer, assigning the same temporal key to all rows. */
+ def addTemporalComponent(value: TemporalKey): RasterFrameLayer = {
+ require(self.temporalKeyColumn.isEmpty, "RasterFrameLayer already has a temporal component")
val tlm = tileLayerMetadata.left.get
val newBounds: Bounds[SpaceTimeKey] =
tlm.bounds.flatMap[SpaceTimeKey] {
@@ -101,14 +111,14 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
}
/** Create a temporal key from the given time and assign it as thea temporal key for all rows. */
- def addTemporalComponent(value: ZonedDateTime): RasterFrame = addTemporalComponent(TemporalKey(value))
+ def addTemporalComponent(value: ZonedDateTime): RasterFrameLayer = addTemporalComponent(TemporalKey(value))
/**
* Append a column containing the temporal key rendered as a TimeStamp.
* @param colName name of column to add
- * @return updated RasterFrame
+ * @return updated RasterFrameLayer
*/
- def withTimestamp(colName: String = TIMESTAMP_COLUMN.columnName): RasterFrame = {
+ def withTimestamp(colName: String = TIMESTAMP_COLUMN.columnName): RasterFrameLayer = {
self.withColumn(colName, (TEMPORAL_KEY_COLUMN.getField("instant").as[Long] / 1000).cast(TimestampType))
.certify
}
@@ -124,7 +134,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
* @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
*/
@Experimental
- def spatialJoin(right: RasterFrame, joinType: String = "inner"): RasterFrame = {
+ def spatialJoin(right: RasterFrameLayer, joinType: String = "inner"): RasterFrameLayer = {
val left = self
val leftMetadata = left.tileLayerMetadata.merge
@@ -137,7 +147,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
)
}
- def updateNames(rf: RasterFrame,
+ def updateNames(rf: RasterFrameLayer,
prefix: String,
sk: TypedColumn[Any, SpatialKey],
tk: Option[TypedColumn[Any, TemporalKey]]) = {
@@ -179,7 +189,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
/**
* Performs a full RDD scans of the key column for the data extent, and updates the [[TileLayerMetadata]] data extent to match.
*/
- def clipLayerExtent: RasterFrame = {
+ def clipLayerExtent: RasterFrameLayer = {
val metadata = tileLayerMetadata
val extent = metadata.merge.extent
val layout = metadata.merge.layout
@@ -213,17 +223,36 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
}
/**
- * Convert a single tile column from RasterFrame to a GeoTrellis [[TileLayerRDD]]
+ * Convert a single tile column from RasterFrameLayer to a GeoTrellis [[TileLayerRDD]]
* @param tileCol column with tiles to be the
*/
def toTileLayerRDD(tileCol: Column): Either[TileLayerRDD[SpatialKey], TileLayerRDD[SpaceTimeKey]] =
tileLayerMetadata.fold(
- tlm ⇒ Left(ContextRDD(self.select(self.spatialKeyColumn, tileCol.as[Tile]).rdd, tlm)),
+ tlm ⇒ {
+ val rdd = self.select(self.spatialKeyColumn, tileCol.as[Tile])
+ .rdd
+ .map {
+ // Wrapped tiles can break GeoTrellis Avro code.
+ case (sk, wrapped: ShowableTile) => (sk, wrapped.delegate)
+ case o => o
+ }
+
+ Left(ContextRDD(rdd, tlm))
+ },
tlm ⇒ {
val rdd = self
.select(self.spatialKeyColumn, self.temporalKeyColumn.get, tileCol.as[Tile])
.rdd
- .map { case (sk, tk, v) ⇒ (SpaceTimeKey(sk, tk), v) }
+ .map {
+ case (sk, tk, v) ⇒
+ val tile = v match {
+ // Wrapped tiles can break GeoTrellis Avro code.
+ case wrapped: ShowableTile => wrapped.delegate
+ case o => o
+ }
+
+ (SpaceTimeKey(sk, tk), tile)
+ }
Right(ContextRDD(rdd, tlm))
}
)
@@ -259,7 +288,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
private[rasterframes] def extract[M: JsonFormat](metadataKey: String)(md: Metadata) =
md.getMetadata(metadataKey).json.parseJson.convertTo[M]
- /** Convert the tiles in the RasterFrame into a single raster. For RasterFrames keyed with temporal keys, they
+ /** Convert the tiles in the RasterFrameLayer into a single raster. For RasterFrames keyed with temporal keys, they
* will be merge undeterministically. */
def toRaster(tileCol: Column,
rasterCols: Int,
@@ -294,7 +323,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame]
ProjectedRaster(croppedTile.tile, md.extent, md.crs)
}
- /** Convert the Red, Green & Blue assigned tiles in the RasterFrame into a single color composite raster.
+ /** Convert the Red, Green & Blue assigned tiles in the RasterFrameLayer into a single color composite raster.
* For RasterFrames keyed with temporal keys, they will be merged underterministically. */
def toMultibandRaster(
tileCols: Seq[Column],
diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterJoin.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterJoin.scala
new file mode 100644
index 000000000..e0cec7a8c
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterJoin.scala
@@ -0,0 +1,99 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.extensions
+import org.apache.spark.sql._
+import org.apache.spark.sql.functions._
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.functions.reproject_and_merge
+import org.locationtech.rasterframes.util._
+
+import scala.util.Random
+
+object RasterJoin {
+
+ def apply(left: DataFrame, right: DataFrame): DataFrame = {
+ val df = apply(left, right, left("extent"), left("crs"), right("extent"), right("crs"))
+ df.drop(right("extent")).drop(right("crs"))
+ }
+
+ def apply(left: DataFrame, right: DataFrame, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column): DataFrame = {
+ val leftGeom = st_geometry(leftExtent)
+ val rightGeomReproj = st_reproject(st_geometry(rightExtent), rightCRS, leftCRS)
+ val joinExpr = st_intersects(leftGeom, rightGeomReproj)
+ apply(left, right, joinExpr, leftExtent, leftCRS, rightExtent, rightCRS)
+ }
+
+ def apply(left: DataFrame, right: DataFrame, joinExprs: Column, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column): DataFrame = {
+ // Convert resolved column into a symbolic one.
+ def unresolved(c: Column): Column = col(c.columnName)
+
+ // Unique id for temporary columns
+ val id = Random.alphanumeric.take(5).mkString("_", "", "_")
+
+ // Post aggregation left extent. We preserve the original name.
+ val leftExtent2 = leftExtent.columnName
+ // Post aggregation left crs. We preserve the original name.
+ val leftCRS2 = leftCRS.columnName
+ // Post aggregation right extent. We create a new name.
+ val rightExtent2 = id + "extent"
+ // Post aggregation right crs. We create a new name.
+ val rightCRS2 = id + "crs"
+
+
+ // Gathering up various expressions we'll use to construct the result.
+ // After joining We will be doing a groupBy the LHS. We have to define the aggregations to perform after the groupBy.
+ // On the LHS we just want the first thing (subsequent ones should be identical.
+ val leftAggCols = left.columns.map(s => first(left(s), true) as s)
+ // On the RHS we collect result as a list.
+ val rightAggCtx = Seq(collect_list(rightExtent) as rightExtent2, collect_list(rightCRS) as rightCRS2)
+ val rightAggTiles = right.tileColumns.map(c => collect_list(c) as c.columnName)
+ val rightAggOther = right.notTileColumns
+ .filter(n => n.columnName != rightExtent.columnName && n.columnName != rightCRS.columnName)
+ .map(c => collect_list(c) as (c.columnName + "_agg"))
+ val aggCols = leftAggCols ++ rightAggTiles ++ rightAggCtx ++ rightAggOther
+
+ // After the aggregation we take all the tiles we've collected and resample + merge
+ // into LHS extent/CRS.
+ // Use a representative tile from the left for the tile dimensions
+ val leftTile = left.tileColumns.headOption.getOrElse(throw new IllegalArgumentException("Need at least one target tile on LHS"))
+ val reprojCols = rightAggTiles.map(t => reproject_and_merge(
+ col(leftExtent2), col(leftCRS2), col(t.columnName), col(rightExtent2), col(rightCRS2), rf_dimensions(unresolved(leftTile))
+ ) as t.columnName)
+
+ val finalCols = leftAggCols.map(unresolved) ++ reprojCols ++ rightAggOther.map(unresolved)
+
+ // Here's the meat:
+ left
+ // 1. Add a unique ID to each LHS row for subequent grouping.
+ .withColumn(id, monotonically_increasing_id())
+ // 2. Perform the left-outer join
+ .join(right, joinExprs, joinType = "left")
+ // 3. Group by the unique ID, reestablishing the LHS count
+ .groupBy(col(id))
+ // 4. Apply aggregation to left and right columns:
+ // a. LHS just take the first entity
+ // b. RHS collect all results in a list
+ .agg(aggCols.head, aggCols.tail: _*)
+ // 5. Perform merge on RHC tile column collections, pass everything else through.
+ .select(finalCols: _*)
+ }
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/ReprojectToLayer.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/ReprojectToLayer.scala
new file mode 100644
index 000000000..c396deaee
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/ReprojectToLayer.scala
@@ -0,0 +1,49 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.extensions
+
+import geotrellis.spark.{SpatialKey, TileLayerMetadata}
+import org.apache.spark.sql._
+import org.apache.spark.sql.functions.broadcast
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.util._
+object ReprojectToLayer {
+
+ def apply(df: DataFrame, tlm: TileLayerMetadata[SpatialKey]): RasterFrameLayer = {
+ // create a destination dataframe with crs and extend columns
+ // use RasterJoin to do the rest.
+ val gb = tlm.gridBounds
+ val crs = tlm.crs
+
+ val gridItems = for {
+ (col, row) <- gb.coordsIter
+ sk = SpatialKey(col, row)
+ e = tlm.mapTransform(sk)
+ } yield (sk, e, crs)
+
+ val dest = df.sparkSession.createDataFrame(gridItems.toSeq)
+ .toDF(SPATIAL_KEY_COLUMN.columnName, EXTENT_COLUMN.columnName, CRS_COLUMN.columnName)
+ val joined = RasterJoin(broadcast(dest), df)
+
+ joined.asLayer(SPATIAL_KEY_COLUMN, tlm)
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/SQLContextMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/SQLContextMethods.scala
similarity index 84%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/SQLContextMethods.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/SQLContextMethods.scala
index bcd2b31c4..4a6df34cc 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/SQLContextMethods.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/SQLContextMethods.scala
@@ -15,12 +15,14 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.extensions
+package org.locationtech.rasterframes.extensions
import geotrellis.util.MethodExtensions
-import org.apache.spark.sql.{SQLContext, rf}
+import org.apache.spark.sql.SQLContext
/**
@@ -30,7 +32,7 @@ import org.apache.spark.sql.{SQLContext, rf}
*/
trait SQLContextMethods extends MethodExtensions[SQLContext] {
def withRasterFrames: SQLContext = {
- astraea.spark.rasterframes.initRF(self)
+ org.locationtech.rasterframes.initRF(self)
self
}
}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/SinglebandGeoTiffMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/SinglebandGeoTiffMethods.scala
new file mode 100644
index 000000000..833ba80e3
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/SinglebandGeoTiffMethods.scala
@@ -0,0 +1,59 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.extensions
+
+import geotrellis.proj4.CRS
+import geotrellis.raster.io.geotiff.SinglebandGeoTiff
+import geotrellis.util.MethodExtensions
+import geotrellis.vector.Extent
+import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.model.TileDimensions
+
+trait SinglebandGeoTiffMethods extends MethodExtensions[SinglebandGeoTiff] {
+ def toDF(dims: TileDimensions = NOMINAL_TILE_DIMS)(implicit spark: SparkSession): DataFrame = {
+
+ val segmentLayout = self.imageData.segmentLayout
+ val re = self.rasterExtent
+ val crs = self.crs
+
+ val windows = segmentLayout.listWindows(dims.cols, dims.rows)
+ val subtiles = self.crop(windows)
+
+ val rows = for {
+ (gridbounds, tile) ← subtiles.toSeq
+ } yield {
+ val extent = re.extentFor(gridbounds, false)
+ Row(extent.toRow, crs.toRow, tile)
+ }
+
+ val schema = StructType(Seq(
+ StructField("extent", schemaOf[Extent], false),
+ StructField("crs", schemaOf[CRS], false),
+ StructField("tile", TileType, false)
+ ))
+
+ spark.createDataFrame(spark.sparkContext.makeRDD(rows, 1), schema)
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/SparkSessionMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/SparkSessionMethods.scala
similarity index 86%
rename from core/src/main/scala/astraea/spark/rasterframes/extensions/SparkSessionMethods.scala
rename to core/src/main/scala/org/locationtech/rasterframes/extensions/SparkSessionMethods.scala
index 9447e812e..a726b4052 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/extensions/SparkSessionMethods.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/SparkSessionMethods.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.extensions
+package org.locationtech.rasterframes.extensions
import geotrellis.util.MethodExtensions
import org.apache.spark.sql.SparkSession
@@ -29,7 +31,7 @@ import org.apache.spark.sql.SparkSession
*/
trait SparkSessionMethods extends MethodExtensions[SparkSession] {
def withRasterFrames: SparkSession = {
- astraea.spark.rasterframes.initRF(self.sqlContext)
+ org.locationtech.rasterframes.initRF(self.sqlContext)
self
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/package.scala b/core/src/main/scala/org/locationtech/rasterframes/functions/package.scala
similarity index 65%
rename from core/src/main/scala/astraea/spark/rasterframes/functions/package.scala
rename to core/src/main/scala/org/locationtech/rasterframes/functions/package.scala
index 060b08fa3..87894188a 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/functions/package.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/functions/package.scala
@@ -1,28 +1,33 @@
/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
* Copyright 2017 Astraea, Inc.
*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
-
-import astraea.spark.rasterframes.expressions.aggstats._
-import astraea.spark.rasterframes.jts.ReprojectionTransformer
-import astraea.spark.rasterframes.util.CRSParser
-import com.vividsolutions.jts.geom.Geometry
-import geotrellis.raster.mapalgebra.local._
+package org.locationtech.rasterframes
+import geotrellis.proj4.CRS
+import geotrellis.raster.reproject.Reproject
import geotrellis.raster.{Tile, _}
import geotrellis.vector.Extent
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.functions.udf
+import org.apache.spark.sql.{Row, SQLContext}
+import org.locationtech.jts.geom.Geometry
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.model.TileDimensions
/**
* Module utils.
@@ -66,6 +71,10 @@ package object functions {
}
}
+ private[rasterframes] val arrayToTile: (Array[_], Int, Int) ⇒ Tile = (a, cols, rows) ⇒ {
+ arrayToTile(cols, rows).apply(a)
+ }
+
/** Set the tile's no-data value. */
private[rasterframes] def withNoData(nodata: Double) = safeEval[Tile, Tile](_.withNoData(Some(nodata)))
@@ -84,6 +93,8 @@ package object functions {
}
}
+
+
/** Alias for constant tiles of zero */
private[rasterframes] val tileZeros: (Int, Int, String) ⇒ Tile = (cols, rows, cellTypeName) ⇒
makeConstantTile(0, cols, rows, cellTypeName)
@@ -92,6 +103,36 @@ package object functions {
private[rasterframes] val tileOnes: (Int, Int, String) ⇒ Tile = (cols, rows, cellTypeName) ⇒
makeConstantTile(1, cols, rows, cellTypeName)
+ val reproject_and_merge_f: (Row, Row, Seq[Tile], Seq[Row], Seq[Row], Row) => Tile = (leftExtentEnc: Row, leftCRSEnc: Row, tiles: Seq[Tile], rightExtentEnc: Seq[Row], rightCRSEnc: Seq[Row], leftDimsEnc: Row) => {
+ if (tiles.isEmpty) null
+ else {
+ require(tiles.length == rightExtentEnc.length && tiles.length == rightCRSEnc.length, "size mismatch")
+
+ val leftExtent = leftExtentEnc.to[Extent]
+ val leftDims = leftDimsEnc.to[TileDimensions]
+ val leftCRS = leftCRSEnc.to[CRS]
+ val rightExtents = rightExtentEnc.map(_.to[Extent])
+ val rightCRSs = rightCRSEnc.map(_.to[CRS])
+
+ val cellType = tiles.map(_.cellType).reduceOption(_ union _).getOrElse(tiles.head.cellType)
+
+ // TODO: how to allow control over... expression?
+ val projOpts = Reproject.Options.DEFAULT
+ val dest: Tile = ArrayTile.empty(cellType, leftDims.cols, leftDims.rows)
+ //is there a GT function to do all this?
+ tiles.zip(rightExtents).zip(rightCRSs).map {
+ case ((tile, extent), crs) =>
+ tile.reproject(extent, crs, leftCRS, projOpts)
+ }.foldLeft(dest)((d, t) =>
+ d.merge(leftExtent, t.extent, t.tile, projOpts.method)
+ )
+ }
+ }
+
+ // NB: Don't be tempted to make this a `val`. Spark will barf if `withRasterFrames` hasn't been called first.
+ def reproject_and_merge = udf(reproject_and_merge_f)
+ .withName("reproject_and_merge")
+
private[rasterframes] val cellTypes: () ⇒ Seq[String] = () ⇒
Seq(
@@ -125,24 +166,12 @@ package object functions {
}
}
- /** Reporjects a geometry column from one CRS to another, where CRS are defined in Proj4 format. */
- private[rasterframes] val reprojectGeometryCRSName: (Geometry, String, String) ⇒ Geometry =
- (sourceGeom, srcName, dstName) ⇒ {
- val src = CRSParser(srcName)
- val dst = CRSParser(dstName)
- val trans = new ReprojectionTransformer(src, dst)
- trans.transform(sourceGeom)
- }
-
def register(sqlContext: SQLContext): Unit = {
-
sqlContext.udf.register("rf_make_constant_tile", makeConstantTile)
- sqlContext.udf.register("rf_tile_zeros", tileZeros)
- sqlContext.udf.register("rf_tile_ones", tileOnes)
-
+ sqlContext.udf.register("rf_make_zeros_tile", tileZeros)
+ sqlContext.udf.register("rf_make_ones_tile", tileOnes)
sqlContext.udf.register("rf_cell_types", cellTypes)
sqlContext.udf.register("rf_rasterize", rasterize)
-
- sqlContext.udf.register("rf_reproject_geometry", reprojectGeometryCRSName)
+ sqlContext.udf.register("rf_array_to_tile", arrayToTile)
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala b/core/src/main/scala/org/locationtech/rasterframes/jts/Implicits.scala
similarity index 92%
rename from core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala
rename to core/src/main/scala/org/locationtech/rasterframes/jts/Implicits.scala
index e257ebfa5..358fdc258 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/jts/Implicits.scala
@@ -15,21 +15,23 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.jts
+package org.locationtech.rasterframes.jts
import java.sql.{Date, Timestamp}
import java.time.{LocalDate, ZonedDateTime}
-import astraea.spark.rasterframes.expressions.SpatialRelation.{Contains, Intersects}
-import com.vividsolutions.jts.geom._
+import org.locationtech.rasterframes.expressions.SpatialRelation.{Contains, Intersects}
+import org.locationtech.jts.geom._
import geotrellis.util.MethodExtensions
import geotrellis.vector.{Point ⇒ gtPoint}
import org.apache.spark.sql.{Column, TypedColumn}
import org.apache.spark.sql.functions._
import org.locationtech.geomesa.spark.jts.DataFrameFunctions.SpatialConstructors
-import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
+import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._
/**
* Extension methods on typed columns allowing for DSL-like queries over JTS types.
diff --git a/core/src/main/scala/astraea/spark/rasterframes/jts/ReprojectionTransformer.scala b/core/src/main/scala/org/locationtech/rasterframes/jts/ReprojectionTransformer.scala
similarity index 85%
rename from core/src/main/scala/astraea/spark/rasterframes/jts/ReprojectionTransformer.scala
rename to core/src/main/scala/org/locationtech/rasterframes/jts/ReprojectionTransformer.scala
index 1d583c739..c4751cb3c 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/jts/ReprojectionTransformer.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/jts/ReprojectionTransformer.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,13 +15,14 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.jts
+package org.locationtech.rasterframes.jts
-import com.vividsolutions.jts.geom.{CoordinateSequence, Geometry}
-import com.vividsolutions.jts.geom.util.GeometryTransformer
+import org.locationtech.jts.geom.{CoordinateSequence, Geometry}
+import org.locationtech.jts.geom.util.GeometryTransformer
import geotrellis.proj4.CRS
/**
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ml/NoDataFilter.scala b/core/src/main/scala/org/locationtech/rasterframes/ml/NoDataFilter.scala
similarity index 89%
rename from core/src/main/scala/astraea/spark/rasterframes/ml/NoDataFilter.scala
rename to core/src/main/scala/org/locationtech/rasterframes/ml/NoDataFilter.scala
index dfe9499a3..5cd9e780e 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/ml/NoDataFilter.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/ml/NoDataFilter.scala
@@ -15,17 +15,22 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.ml
+package org.locationtech.rasterframes.ml
-import astraea.spark.rasterframes.ml.Parameters.HasInputCols
+import org.locationtech.rasterframes.ml.Parameters.HasInputCols
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable}
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types.StructType
import java.util.ArrayList
+
+import org.locationtech.rasterframes.ml.Parameters.HasInputCols
+
import scala.collection.JavaConversions._
/**
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ml/Parameters.scala b/core/src/main/scala/org/locationtech/rasterframes/ml/Parameters.scala
similarity index 92%
rename from core/src/main/scala/astraea/spark/rasterframes/ml/Parameters.scala
rename to core/src/main/scala/org/locationtech/rasterframes/ml/Parameters.scala
index 4bc2fd476..4d273a7f9 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/ml/Parameters.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/ml/Parameters.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.ml
+package org.locationtech.rasterframes.ml
import org.apache.spark.ml.param.{Params, StringArrayParam}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ml/TileColumnSupport.scala b/core/src/main/scala/org/locationtech/rasterframes/ml/TileColumnSupport.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/ml/TileColumnSupport.scala
rename to core/src/main/scala/org/locationtech/rasterframes/ml/TileColumnSupport.scala
index eccc8f00e..d261f7e91 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/ml/TileColumnSupport.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/ml/TileColumnSupport.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,10 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.ml
+package org.locationtech.rasterframes.ml
import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types.{StructField, StructType}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ml/TileExploder.scala b/core/src/main/scala/org/locationtech/rasterframes/ml/TileExploder.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/ml/TileExploder.scala
rename to core/src/main/scala/org/locationtech/rasterframes/ml/TileExploder.scala
index d52b82d35..38f978231 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/ml/TileExploder.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/ml/TileExploder.scala
@@ -15,18 +15,20 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.ml
+package org.locationtech.rasterframes.ml
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable}
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.types._
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes.util._
/**
* SparkML Transformer for expanding tiles into single cell rows with
@@ -56,7 +58,7 @@ class TileExploder(override val uid: String) extends Transformer
val (tiles, nonTiles) = selectTileAndNonTileFields(dataset.schema)
val tileCols = tiles.map(f ⇒ col(f.name))
val nonTileCols = nonTiles.map(f ⇒ col(f.name))
- val exploder = explode_tiles(tileCols: _*)
+ val exploder = rf_explode_tiles(tileCols: _*)
dataset.select(nonTileCols :+ exploder: _*)
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala b/core/src/main/scala/org/locationtech/rasterframes/model/CellContext.scala
similarity index 70%
rename from core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala
rename to core/src/main/scala/org/locationtech/rasterframes/model/CellContext.scala
index cac2903dd..95a2e1bf0 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/model/CellContext.scala
@@ -19,25 +19,27 @@
*
*/
-package astraea.spark.rasterframes.model
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+package org.locationtech.rasterframes.model
+
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.types.{ShortType, StructField, StructType}
+import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import CatalystSerializer._
-case class CellContext(tile_context: TileContext, tile_data_context: TileDataContext, col_index: Short, row_index: Short)
+case class CellContext(tileContext: TileContext, tileDataContext: TileDataContext, colIndex: Short, rowIndex: Short)
object CellContext {
implicit val serializer: CatalystSerializer[CellContext] = new CatalystSerializer[CellContext] {
override def schema: StructType = StructType(Seq(
- StructField("tile_context", CatalystSerializer[TileContext].schema, false),
- StructField("tile_data_context", CatalystSerializer[TileDataContext].schema, false),
- StructField("col_index", ShortType, false),
- StructField("row_index", ShortType, false)
+ StructField("tileContext", schemaOf[TileContext], false),
+ StructField("tileDataContext", schemaOf[TileDataContext], false),
+ StructField("colIndex", ShortType, false),
+ StructField("rowIndex", ShortType, false)
))
override protected def to[R](t: CellContext, io: CatalystSerializer.CatalystIO[R]): R = io.create(
- io.to(t.tile_context),
- io.to(t.tile_data_context),
- t.col_index,
- t.row_index
+ io.to(t.tileContext),
+ io.to(t.tileDataContext),
+ t.colIndex,
+ t.rowIndex
)
override protected def from[R](t: R, io: CatalystSerializer.CatalystIO[R]): CellContext = CellContext(
io.get[TileContext](t, 0),
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala b/core/src/main/scala/org/locationtech/rasterframes/model/Cells.scala
similarity index 61%
rename from core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala
rename to core/src/main/scala/org/locationtech/rasterframes/model/Cells.scala
index acf847e45..1f7ae4d75 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/model/Cells.scala
@@ -19,42 +19,61 @@
*
*/
-package astraea.spark.rasterframes.model
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
-import astraea.spark.rasterframes.ref.RasterRef
-import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
-import geotrellis.raster.{ArrayTile, Tile}
+package org.locationtech.rasterframes.model
+
+import geotrellis.raster.{ArrayTile, ConstantTile, Tile}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.types.{BinaryType, StructField, StructType}
+import org.locationtech.rasterframes
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import org.locationtech.rasterframes.ref.RasterRef
+import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile
+import org.locationtech.rasterframes.tiles.ShowableTile
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile.ConcreteProjectedRasterTile
/** Represents the union of binary cell datas or a reference to the data.*/
case class Cells(data: Either[Array[Byte], RasterRef]) {
def isRef: Boolean = data.isRight
+
/** Convert cells into either a RasterRefTile or an ArrayTile. */
def toTile(ctx: TileDataContext): Tile = {
data.fold(
- bytes => ArrayTile.fromBytes(bytes, ctx.cell_type, ctx.dimensions.cols, ctx.dimensions.rows),
+ bytes => {
+ val t = ArrayTile.fromBytes(bytes, ctx.cellType, ctx.dimensions.cols, ctx.dimensions.rows)
+ if (Cells.showableTiles) new ShowableTile(t)
+ else t
+ },
ref => RasterRefTile(ref)
)
}
}
object Cells {
+ private val showableTiles = rasterframes.rfConfig.getBoolean("showable-tiles")
/** Extracts the Cells from a Tile. */
def apply(t: Tile): Cells = {
t match {
+ case prt: ConcreteProjectedRasterTile =>
+ apply(prt.t)
case ref: RasterRefTile =>
Cells(Right(ref.rr))
- case o =>
+ case const: ConstantTile =>
+ // Need to expand constant tiles so they can be interpreted properly in catalyst and Python.
+ // If we don't, the serialization breaks.
+ Cells(Left(const.toArrayTile().toBytes))
+ case o =>
Cells(Left(o.toBytes))
}
}
implicit def cellsSerializer: CatalystSerializer[Cells] = new CatalystSerializer[Cells] {
- override def schema: StructType = StructType(Seq(
- StructField("cells", BinaryType, true),
- StructField("ref", CatalystSerializer[RasterRef].schema, true)
- ))
+ override def schema: StructType =
+ StructType(
+ Seq(
+ StructField("cells", BinaryType, true),
+ StructField("ref", schemaOf[RasterRef], true)
+ ))
override protected def to[R](t: Cells, io: CatalystSerializer.CatalystIO[R]): R = io.create(
t.data.left.getOrElse(null),
t.data.right.map(rr => io.to(rr)).right.getOrElse(null)
diff --git a/core/src/main/scala/org/locationtech/rasterframes/model/FixedRasterExtent.scala b/core/src/main/scala/org/locationtech/rasterframes/model/FixedRasterExtent.scala
new file mode 100644
index 000000000..cdce274bb
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/model/FixedRasterExtent.scala
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2016 Azavea
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.locationtech.rasterframes.model
+
+
+import geotrellis.raster._
+import geotrellis.vector._
+
+import scala.math.ceil
+
+/**
+ * This class is a copy of the GeoTrellis 2.x `RasterExtent`,
+ * with [GT 3.0 fixes](https://github.com/locationtech/geotrellis/pull/2953/files) incorporated into the
+ * new `GridExtent[T]` class. This class should be removed after RasterFrames is upgraded to GT 3.x.
+ */
+case class FixedRasterExtent(
+ override val extent: Extent,
+ override val cellwidth: Double,
+ override val cellheight: Double,
+ cols: Int,
+ rows: Int
+) extends GridExtent(extent, cellwidth, cellheight) with Grid {
+ import FixedRasterExtent._
+
+ if (cols <= 0) throw GeoAttrsError(s"invalid cols: $cols")
+ if (rows <= 0) throw GeoAttrsError(s"invalid rows: $rows")
+
+ /**
+ * Convert map coordinates (x, y) to grid coordinates (col, row).
+ */
+ final def mapToGrid(x: Double, y: Double): (Int, Int) = {
+ val col = floorWithTolerance((x - extent.xmin) / cellwidth).toInt
+ val row = floorWithTolerance((extent.ymax - y) / cellheight).toInt
+ (col, row)
+ }
+
+ /**
+ * Convert map coordinate x to grid coordinate column.
+ */
+ final def mapXToGrid(x: Double): Int = floorWithTolerance(mapXToGridDouble(x)).toInt
+
+ /**
+ * Convert map coordinate x to grid coordinate column.
+ */
+ final def mapXToGridDouble(x: Double): Double = (x - extent.xmin) / cellwidth
+
+ /**
+ * Convert map coordinate y to grid coordinate row.
+ */
+ final def mapYToGrid(y: Double): Int = floorWithTolerance(mapYToGridDouble(y)).toInt
+
+ /**
+ * Convert map coordinate y to grid coordinate row.
+ */
+ final def mapYToGridDouble(y: Double): Double = (extent.ymax - y ) / cellheight
+
+ /**
+ * Convert map coordinate tuple (x, y) to grid coordinates (col, row).
+ */
+ final def mapToGrid(mapCoord: (Double, Double)): (Int, Int) = {
+ val (x, y) = mapCoord
+ mapToGrid(x, y)
+ }
+
+ /**
+ * Convert a point to grid coordinates (col, row).
+ */
+ final def mapToGrid(p: Point): (Int, Int) =
+ mapToGrid(p.x, p.y)
+
+ /**
+ * The map coordinate of a grid cell is the center point.
+ */
+ final def gridToMap(col: Int, row: Int): (Double, Double) = {
+ val x = col * cellwidth + extent.xmin + (cellwidth / 2)
+ val y = extent.ymax - (row * cellheight) - (cellheight / 2)
+
+ (x, y)
+ }
+
+ /**
+ * For a give column, find the corresponding x-coordinate in the
+ * grid of the present [[FixedRasterExtent]].
+ */
+ final def gridColToMap(col: Int): Double = {
+ col * cellwidth + extent.xmin + (cellwidth / 2)
+ }
+
+ /**
+ * For a give row, find the corresponding y-coordinate in the grid
+ * of the present [[FixedRasterExtent]].
+ */
+ final def gridRowToMap(row: Int): Double = {
+ extent.ymax - (row * cellheight) - (cellheight / 2)
+ }
+
+ /**
+ * Gets the GridBounds aligned with this FixedRasterExtent that is the
+ * smallest subgrid of containing all points within the extent. The
+ * extent is considered inclusive on it's north and west borders,
+ * exclusive on it's east and south borders. See [[FixedRasterExtent]]
+ * for a discussion of grid and extent boundary concepts.
+ *
+ * The 'clamp' flag determines whether or not to clamp the
+ * GridBounds to the FixedRasterExtent; defaults to true. If false,
+ * GridBounds can contain negative values, or values outside of
+ * this FixedRasterExtent's boundaries.
+ *
+ * @param subExtent The extent to get the grid bounds for
+ * @param clamp A boolean
+ */
+ def gridBoundsFor(subExtent: Extent, clamp: Boolean = true): GridBounds = {
+ // West and North boundaries are a simple mapToGrid call.
+ val (colMin, rowMin) = mapToGrid(subExtent.xmin, subExtent.ymax)
+
+ // If South East corner is on grid border lines, we want to still only include
+ // what is to the West and\or North of the point. However if the border point
+ // is not directly on a grid division, include the whole row and/or column that
+ // contains the point.
+ val colMax = {
+ val colMaxDouble = mapXToGridDouble(subExtent.xmax)
+ if(math.abs(colMaxDouble - floorWithTolerance(colMaxDouble)) < FixedRasterExtent.epsilon) colMaxDouble.toInt - 1
+ else colMaxDouble.toInt
+ }
+
+ val rowMax = {
+ val rowMaxDouble = mapYToGridDouble(subExtent.ymin)
+ if(math.abs(rowMaxDouble - floorWithTolerance(rowMaxDouble)) < FixedRasterExtent.epsilon) rowMaxDouble.toInt - 1
+ else rowMaxDouble.toInt
+ }
+
+ if(clamp) {
+ GridBounds(math.min(math.max(colMin, 0), cols - 1),
+ math.min(math.max(rowMin, 0), rows - 1),
+ math.min(math.max(colMax, 0), cols - 1),
+ math.min(math.max(rowMax, 0), rows - 1))
+ } else {
+ GridBounds(colMin, rowMin, colMax, rowMax)
+ }
+ }
+
+ /**
+ * Combine two different [[FixedRasterExtent]]s (which must have the
+ * same cellsizes). The result is a new extent at the same
+ * resolution.
+ */
+ def combine (that: FixedRasterExtent): FixedRasterExtent = {
+ if (cellwidth != that.cellwidth)
+ throw GeoAttrsError(s"illegal cellwidths: $cellwidth and ${that.cellwidth}")
+ if (cellheight != that.cellheight)
+ throw GeoAttrsError(s"illegal cellheights: $cellheight and ${that.cellheight}")
+
+ val newExtent = extent.combine(that.extent)
+ val newRows = ceil(newExtent.height / cellheight).toInt
+ val newCols = ceil(newExtent.width / cellwidth).toInt
+
+ FixedRasterExtent(newExtent, cellwidth, cellheight, newCols, newRows)
+ }
+
+ /**
+ * Returns a [[RasterExtent]] with the same extent, but a modified
+ * number of columns and rows based on the given cell height and
+ * width.
+ */
+ def withResolution(targetCellWidth: Double, targetCellHeight: Double): FixedRasterExtent = {
+ val newCols = math.ceil((extent.xmax - extent.xmin) / targetCellWidth).toInt
+ val newRows = math.ceil((extent.ymax - extent.ymin) / targetCellHeight).toInt
+ FixedRasterExtent(extent, targetCellWidth, targetCellHeight, newCols, newRows)
+ }
+
+ /**
+ * Returns a [[FixedRasterExtent]] with the same extent, but a modified
+ * number of columns and rows based on the given cell height and
+ * width.
+ */
+ def withResolution(cellSize: CellSize): FixedRasterExtent =
+ withResolution(cellSize.width, cellSize.height)
+
+ /**
+ * Returns a [[FixedRasterExtent]] with the same extent and the given
+ * number of columns and rows.
+ */
+ def withDimensions(targetCols: Int, targetRows: Int): FixedRasterExtent =
+ FixedRasterExtent(extent, targetCols, targetRows)
+
+ /**
+ * Adjusts a raster extent so that it can encompass the tile
+ * layout. Will resample the extent, but keep the resolution, and
+ * preserve north and west borders
+ */
+ def adjustTo(tileLayout: TileLayout): FixedRasterExtent = {
+ val totalCols = tileLayout.tileCols * tileLayout.layoutCols
+ val totalRows = tileLayout.tileRows * tileLayout.layoutRows
+
+ val resampledExtent = Extent(extent.xmin, extent.ymax - (cellheight*totalRows),
+ extent.xmin + (cellwidth*totalCols), extent.ymax)
+
+ FixedRasterExtent(resampledExtent, cellwidth, cellheight, totalCols, totalRows)
+ }
+
+ /**
+ * Returns a new [[FixedRasterExtent]] which represents the GridBounds
+ * in relation to this FixedRasterExtent.
+ */
+ def rasterExtentFor(gridBounds: GridBounds): FixedRasterExtent = {
+ val (xminCenter, ymaxCenter) = gridToMap(gridBounds.colMin, gridBounds.rowMin)
+ val (xmaxCenter, yminCenter) = gridToMap(gridBounds.colMax, gridBounds.rowMax)
+ val (hcw, hch) = (cellwidth / 2, cellheight / 2)
+ val e = Extent(xminCenter - hcw, yminCenter - hch, xmaxCenter + hcw, ymaxCenter + hch)
+ FixedRasterExtent(e, cellwidth, cellheight, gridBounds.width, gridBounds.height)
+ }
+}
+
+/**
+ * The companion object for the [[FixedRasterExtent]] type.
+ */
+object FixedRasterExtent {
+ final val epsilon = 0.0000001
+
+ /**
+ * Create a new [[FixedRasterExtent]] from an Extent, a column, and a
+ * row.
+ */
+ def apply(extent: Extent, cols: Int, rows: Int): FixedRasterExtent = {
+ val cw = extent.width / cols
+ val ch = extent.height / rows
+ FixedRasterExtent(extent, cw, ch, cols, rows)
+ }
+
+ /**
+ * Create a new [[FixedRasterExtent]] from an Extent and a [[CellSize]].
+ */
+ def apply(extent: Extent, cellSize: CellSize): FixedRasterExtent = {
+ val cols = (extent.width / cellSize.width).toInt
+ val rows = (extent.height / cellSize.height).toInt
+ FixedRasterExtent(extent, cellSize.width, cellSize.height, cols, rows)
+ }
+
+ /**
+ * Create a new [[FixedRasterExtent]] from a [[CellGrid]] and an Extent.
+ */
+ def apply(tile: CellGrid, extent: Extent): FixedRasterExtent =
+ apply(extent, tile.cols, tile.rows)
+
+ /**
+ * Create a new [[FixedRasterExtent]] from an Extent and a [[CellGrid]].
+ */
+ def apply(extent: Extent, tile: CellGrid): FixedRasterExtent =
+ apply(extent, tile.cols, tile.rows)
+
+
+ /**
+ * The same logic is used in QGIS: https://github.com/qgis/QGIS/blob/607664c5a6b47c559ed39892e736322b64b3faa4/src/analysis/raster/qgsalignraster.cpp#L38
+ * The search query: https://github.com/qgis/QGIS/search?p=2&q=floor&type=&utf8=%E2%9C%93
+ *
+ * GDAL uses smth like that, however it was a bit hard to track it down:
+ * https://github.com/OSGeo/gdal/blob/7601a637dfd204948d00f4691c08f02eb7584de5/gdal/frmts/vrt/vrtsources.cpp#L215
+ * */
+ def floorWithTolerance(value: Double): Double = {
+ val roundedValue = math.round(value)
+ if (math.abs(value - roundedValue) < epsilon) roundedValue
+ else math.floor(value)
+ }
+}
+
diff --git a/core/src/main/scala/org/locationtech/rasterframes/model/LazyCRS.scala b/core/src/main/scala/org/locationtech/rasterframes/model/LazyCRS.scala
new file mode 100644
index 000000000..66352e258
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/model/LazyCRS.scala
@@ -0,0 +1,71 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.model
+
+import LazyCRS.EncodedCRS
+import com.github.blemale.scaffeine.Scaffeine
+import geotrellis.proj4.CRS
+import org.locationtech.proj4j.CoordinateReferenceSystem
+
+class LazyCRS(val encoded: EncodedCRS) extends CRS {
+ private lazy val delegate = LazyCRS.cache.get(encoded)
+ override def proj4jCrs: CoordinateReferenceSystem = delegate.proj4jCrs
+ override def toProj4String: String =
+ if (encoded.startsWith("+proj")) encoded
+ else delegate.toProj4String
+
+ override def equals(o: Any): Boolean = o match {
+ case l: LazyCRS =>
+ encoded == l.encoded ||
+ toProj4String == l.toProj4String ||
+ super.equals(o)
+ case c: CRS =>
+ toProj4String == c.toProj4String ||
+ delegate.equals(c)
+ case _ => false
+ }
+}
+
+object LazyCRS {
+ trait ValidatedCRS
+ type EncodedCRS = String with ValidatedCRS
+
+ @transient
+ private lazy val mapper: PartialFunction[String, CRS] = {
+ case e if e.toUpperCase().startsWith("EPSG") => CRS.fromName(e) //not case-sensitive
+ case p if p.startsWith("+proj") => CRS.fromString(p) // case sensitive
+ case w if w.toUpperCase().startsWith("GEOGCS") => CRS.fromWKT(w) //only case-sensitive inside double quotes
+ }
+
+ @transient
+ private lazy val cache = Scaffeine().build[String, CRS](mapper)
+
+ def apply(crs: CRS): LazyCRS = apply(crs.toProj4String)
+
+ def apply(value: String): LazyCRS = {
+ if (mapper.isDefinedAt(value)) {
+ new LazyCRS(value.asInstanceOf[EncodedCRS])
+ }
+ else throw new IllegalArgumentException(
+ "crs string must be either EPSG code, +proj string, or OGC WKT")
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala b/core/src/main/scala/org/locationtech/rasterframes/model/TileContext.scala
similarity index 83%
rename from core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala
rename to core/src/main/scala/org/locationtech/rasterframes/model/TileContext.scala
index f5d49524c..912e1d81e 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/model/TileContext.scala
@@ -19,14 +19,16 @@
*
*/
-package astraea.spark.rasterframes.model
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
+package org.locationtech.rasterframes.model
+
import geotrellis.proj4.CRS
import geotrellis.raster.Tile
import geotrellis.vector.Extent
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.types.{StructField, StructType}
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
case class TileContext(extent: Extent, crs: CRS) {
def toProjectRasterTile(t: Tile): ProjectedRasterTile = ProjectedRasterTile(t, extent, crs)
@@ -39,8 +41,8 @@ object TileContext {
}
implicit val serializer: CatalystSerializer[TileContext] = new CatalystSerializer[TileContext] {
override def schema: StructType = StructType(Seq(
- StructField("extent", CatalystSerializer[Extent].schema, false),
- StructField("crs", CatalystSerializer[CRS].schema, false)
+ StructField("extent", schemaOf[Extent], false),
+ StructField("crs", schemaOf[CRS], false)
))
override protected def to[R](t: TileContext, io: CatalystSerializer.CatalystIO[R]): R = io.create(
io.to(t.extent),
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala b/core/src/main/scala/org/locationtech/rasterframes/model/TileDataContext.scala
similarity index 80%
rename from core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala
rename to core/src/main/scala/org/locationtech/rasterframes/model/TileDataContext.scala
index 121f8b845..9f6bd358f 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/model/TileDataContext.scala
@@ -19,15 +19,16 @@
*
*/
-package astraea.spark.rasterframes.model
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
+package org.locationtech.rasterframes.model
+
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
import geotrellis.raster.{CellType, Tile}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.types.{StructField, StructType}
+import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
/** Encapsulates all information about a tile aside from actual cell values. */
-case class TileDataContext(cell_type: CellType, dimensions: TileDimensions)
+case class TileDataContext(cellType: CellType, dimensions: TileDimensions)
object TileDataContext {
/** Extracts the TileDataContext from a Tile. */
@@ -41,12 +42,12 @@ object TileDataContext {
implicit val serializer: CatalystSerializer[TileDataContext] = new CatalystSerializer[TileDataContext] {
override def schema: StructType = StructType(Seq(
- StructField("cell_type", CatalystSerializer[CellType].schema, false),
- StructField("dimensions", CatalystSerializer[TileDimensions].schema, false)
+ StructField("cellType", schemaOf[CellType], false),
+ StructField("dimensions", schemaOf[TileDimensions], false)
))
override protected def to[R](t: TileDataContext, io: CatalystIO[R]): R = io.create(
- io.to(t.cell_type),
+ io.to(t.cellType),
io.to(t.dimensions)
)
override protected def from[R](t: R, io: CatalystIO[R]): TileDataContext = TileDataContext(
diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala b/core/src/main/scala/org/locationtech/rasterframes/model/TileDimensions.scala
similarity index 87%
rename from core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala
rename to core/src/main/scala/org/locationtech/rasterframes/model/TileDimensions.scala
index 2f7f579ba..e419ac668 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/model/TileDimensions.scala
@@ -19,13 +19,13 @@
*
*/
-package astraea.spark.rasterframes.model
+package org.locationtech.rasterframes.model
-import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import org.locationtech.rasterframes.encoders.CatalystSerializer.CatalystIO
import geotrellis.raster.Grid
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.types.{ShortType, StructField, StructType}
+import org.locationtech.rasterframes.encoders.CatalystSerializer
/**
* Typed wrapper for tile size information.
@@ -49,8 +49,8 @@ object TileDimensions {
)
override protected def from[R](t: R, io: CatalystIO[R]): TileDimensions = TileDimensions(
- io.getShort(t, 0),
- io.getShort(t, 1)
+ io.getShort(t, 0).toInt,
+ io.getShort(t, 1).toInt
)
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/package.scala b/core/src/main/scala/org/locationtech/rasterframes/rasterframes.scala
similarity index 65%
rename from core/src/main/scala/astraea/spark/rasterframes/package.scala
rename to core/src/main/scala/org/locationtech/rasterframes/rasterframes.scala
index 7b360ed25..1517e8f0e 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/package.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/rasterframes.scala
@@ -1,56 +1,62 @@
/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
* Copyright 2017 Astraea, Inc.
*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark
-
-import astraea.spark.rasterframes.encoders.StandardEncoders
-import astraea.spark.rasterframes.util.ZeroSevenCompatibilityKit
+package org.locationtech
import com.typesafe.config.ConfigFactory
import com.typesafe.scalalogging.LazyLogging
+import geotrellis.raster.isData
import geotrellis.raster.{Tile, TileFeature}
import geotrellis.spark.{ContextRDD, Metadata, SpaceTimeKey, SpatialKey, TileLayerMetadata}
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql._
+import org.apache.spark.sql.rf.{RasterSourceUDT, TileUDT}
+import org.apache.spark.sql.{DataFrame, SQLContext, rf}
import org.locationtech.geomesa.spark.jts.DataFrameFunctions
+import org.locationtech.rasterframes.encoders.StandardEncoders
+import org.locationtech.rasterframes.extensions.Implicits
+import org.locationtech.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.util.ZeroSevenCompatibilityKit
import shapeless.tag.@@
-import scala.language.higherKinds
import scala.reflect.runtime.universe._
-/**
- * Module providing support for RasterFrames.
- * `import astraea.spark.rasterframes._`., and then call `rfInit(SQLContext)`.
- *
- * @since 7/18/17
- */
package object rasterframes extends StandardColumns
with RasterFunctions
with ZeroSevenCompatibilityKit.RasterFunctions
- with rasterframes.extensions.Implicits
+ with Implicits
with rasterframes.jts.Implicits
with StandardEncoders
with DataFrameFunctions.Library
with LazyLogging {
+ @transient
+ private[rasterframes]
+ val rfConfig = ConfigFactory.load().getConfig("rasterframes")
+
/** The generally expected tile size, as defined by configuration property `rasterframes.nominal-tile-size`.*/
@transient
- final val NOMINAL_TILE_SIZE: Int = ConfigFactory.load().getInt("rasterframes.nominal-tile-size")
+ final val NOMINAL_TILE_SIZE: Int = rfConfig.getInt("nominal-tile-size")
+ final val NOMINAL_TILE_DIMS: TileDimensions = TileDimensions(NOMINAL_TILE_SIZE, NOMINAL_TILE_SIZE)
/**
- * Initialization injection point. Must be called before any RasterFrame
+ * Initialization injection point. Must be called before any RasterFrameLayer
* types are used.
*/
def initRF(sqlContext: SQLContext): Unit = {
@@ -79,15 +85,21 @@ package object rasterframes extends StandardColumns
rasterframes.rules.register(sqlContext)
}
+ /** TileUDT type reference. */
+ def TileType = new TileUDT()
+
+ /** RasterSourceUDT type reference. */
+ def RasterSourceType = new RasterSourceUDT()
+
/**
- * A RasterFrame is just a DataFrame with certain invariants, enforced via the methods that create and transform them:
+ * A RasterFrameLayer is just a DataFrame with certain invariants, enforced via the methods that create and transform them:
* 1. One column is a [[geotrellis.spark.SpatialKey]] or [[geotrellis.spark.SpaceTimeKey]]
* 2. One or more columns is a [[Tile]] UDT.
* 3. The `TileLayerMetadata` is encoded and attached to the key column.
*/
- type RasterFrame = DataFrame @@ RasterFrameTag
+ type RasterFrameLayer = DataFrame @@ RasterFrameTag
- /** Tagged type for allowing compiler to help keep track of what has RasterFrame assurances applied to it. */
+ /** Tagged type for allowing compiler to help keep track of what has RasterFrameLayer assurances applied to it. */
trait RasterFrameTag
type TileFeatureLayerRDD[K, D] =
@@ -113,6 +125,8 @@ package object rasterframes extends StandardColumns
trait StandardLayerKey[T] extends Serializable {
val selfType: TypeTag[T]
def isType[R: TypeTag]: Boolean = typeOf[R] =:= selfType.tpe
+ def coerce[K >: T](tlm: TileLayerMetadata[_]): TileLayerMetadata[K] =
+ tlm.asInstanceOf[TileLayerMetadata[K]]
}
object StandardLayerKey {
def apply[T: StandardLayerKey]: StandardLayerKey[T] = implicitly
@@ -124,4 +138,8 @@ package object rasterframes extends StandardColumns
}
}
+ /** Test if a cell value evaluates to true: it is not NoData and it is non-zero */
+ def isCellTrue(v: Double): Boolean = isData(v) & v != 0.0
+ /** Test if a cell value evaluates to true: it is not NoData and it is non-zero */
+ def isCellTrue(v: Int): Boolean = isData(v) & v != 0
}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/DelegatingRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/DelegatingRasterSource.scala
new file mode 100644
index 000000000..c460911a0
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/DelegatingRasterSource.scala
@@ -0,0 +1,89 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import java.net.URI
+
+import geotrellis.contrib.vlm.{RasterSource => GTRasterSource}
+import geotrellis.proj4.CRS
+import geotrellis.raster.io.geotiff.Tags
+import geotrellis.raster.{CellType, GridBounds, MultibandTile, Raster}
+import geotrellis.vector.Extent
+import org.locationtech.rasterframes.ref.RasterSource.URIRasterSource
+
+/** A RasterFrames RasterSource which delegates most operations to a geotrellis-contrib RasterSource */
+abstract class DelegatingRasterSource(source: URI, delegateBuilder: () => GTRasterSource) extends RasterSource with URIRasterSource {
+ @transient
+ @volatile
+ private var _delRef: GTRasterSource = _
+
+ private def retryableRead[R >: Null](f: GTRasterSource => R): R = synchronized {
+ try {
+ if (_delRef == null)
+ _delRef = delegateBuilder()
+ f(_delRef)
+ }
+ catch {
+ // On this exeception we attempt to recreate the delegate and read again.
+ case _: java.nio.BufferUnderflowException =>
+ _delRef = null
+ val newDel = delegateBuilder()
+ val result = f(newDel)
+ _delRef = newDel
+ result
+ }
+ }
+
+ // Bad?
+ override def equals(obj: Any): Boolean = obj match {
+ case drs: DelegatingRasterSource => drs.source == source
+ case _ => false
+ }
+
+ override def hashCode(): Int = source.hashCode()
+
+ // This helps reduce header reads between serializations
+ def info: SimpleRasterInfo = SimpleRasterInfo.cache.get(source.toASCIIString, _ =>
+ retryableRead(rs => SimpleRasterInfo(rs))
+ )
+
+ override def cols: Int = info.cols
+ override def rows: Int = info.rows
+ override def crs: CRS = info.crs
+ override def extent: Extent = info.extent
+ override def cellType: CellType = info.cellType
+ override def bandCount: Int = info.bandCount
+ override def tags: Tags = info.tags
+
+ override protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] =
+ retryableRead(_.readBounds(bounds, bands))
+
+ override def read(bounds: GridBounds, bands: Seq[Int]): Raster[MultibandTile] =
+ retryableRead(_.read(bounds, bands)
+ .getOrElse(throw new IllegalArgumentException(s"Bounds '$bounds' outside of source"))
+ )
+
+ override def read(extent: Extent, bands: Seq[Int]): Raster[MultibandTile] =
+ retryableRead(_.read(extent, bands)
+ .getOrElse(throw new IllegalArgumentException(s"Extent '$extent' outside of source"))
+ )
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala
new file mode 100644
index 000000000..481155f24
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala
@@ -0,0 +1,85 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import java.net.URI
+
+import com.azavea.gdal.GDALWarp
+import com.typesafe.scalalogging.LazyLogging
+import geotrellis.contrib.vlm.gdal.{GDALRasterSource => VLMRasterSource}
+import geotrellis.proj4.CRS
+import geotrellis.raster.io.geotiff.Tags
+import geotrellis.raster.{CellType, GridBounds, MultibandTile, Raster}
+import geotrellis.vector.Extent
+import org.locationtech.rasterframes.ref.RasterSource.URIRasterSource
+
+case class GDALRasterSource(source: URI) extends RasterSource with URIRasterSource {
+
+ @transient
+ private lazy val gdal: VLMRasterSource = {
+ val cleaned = source.toASCIIString
+ .replace("gdal+", "")
+ .replace("gdal:/", "")
+ // VSIPath doesn't like single slash "file:/path..."
+ val tweaked =
+ if (cleaned.matches("^file:/[^/].*"))
+ cleaned.replace("file:", "")
+ else cleaned
+
+ VLMRasterSource(tweaked)
+ }
+
+ protected def tiffInfo = SimpleRasterInfo.cache.get(source.toASCIIString, _ => SimpleRasterInfo(gdal))
+
+ override def crs: CRS = tiffInfo.crs
+
+ override def extent: Extent = tiffInfo.extent
+
+ private def metadata = Map.empty[String, String]
+
+ override def cellType: CellType = tiffInfo.cellType
+
+ override def bandCount: Int = tiffInfo.bandCount
+
+ override def cols: Int = tiffInfo.cols
+
+ override def rows: Int = tiffInfo.rows
+
+ override def tags: Tags = Tags(metadata, List.empty)
+
+ override protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] =
+ gdal.readBounds(bounds, bands)
+}
+
+object GDALRasterSource extends LazyLogging {
+ def gdalVersion(): String = if (hasGDAL) GDALWarp.get_version_info("--version").trim else "not available"
+
+ @transient
+ lazy val hasGDAL: Boolean = try {
+ val _ = new GDALWarp()
+ true
+ } catch {
+ case _: UnsatisfiedLinkError =>
+ logger.warn("GDAL native bindings are not available. Falling back to JVM-based reader for GeoTIFF format.")
+ false
+ }
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/HadoopGeoTiffRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/HadoopGeoTiffRasterSource.scala
new file mode 100644
index 000000000..3249f1bce
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/HadoopGeoTiffRasterSource.scala
@@ -0,0 +1,35 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import java.net.URI
+
+import geotrellis.spark.io.hadoop.HdfsRangeReader
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.locationtech.rasterframes.ref.RasterSource.{URIRasterSource, URIRasterSourceDebugString}
+
+case class HadoopGeoTiffRasterSource(source: URI, config: () => Configuration)
+ extends RangeReaderRasterSource with URIRasterSource with URIRasterSourceDebugString { self =>
+ @transient
+ protected lazy val rangeReader = HdfsRangeReader(new Path(source.getPath), config())
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/InMemoryRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/InMemoryRasterSource.scala
new file mode 100644
index 000000000..3a6a2f5e1
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/InMemoryRasterSource.scala
@@ -0,0 +1,52 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import geotrellis.proj4.CRS
+import geotrellis.raster.{CellType, GridBounds, MultibandTile, Raster, Tile}
+import geotrellis.raster.io.geotiff.Tags
+import geotrellis.vector.Extent
+import org.locationtech.rasterframes.ref.RasterSource.EMPTY_TAGS
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
+
+case class InMemoryRasterSource(tile: Tile, extent: Extent, crs: CRS) extends RasterSource {
+ def this(prt: ProjectedRasterTile) = this(prt, prt.extent, prt.crs)
+
+ override def rows: Int = tile.rows
+
+ override def cols: Int = tile.cols
+
+ override def cellType: CellType = tile.cellType
+
+ override def bandCount: Int = 1
+
+ override def tags: Tags = EMPTY_TAGS
+
+ override protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] = {
+ bounds
+ .map(b => {
+ val subext = rasterExtent.extentFor(b)
+ Raster(MultibandTile(tile.crop(b)), subext)
+ })
+ .toIterator
+ }
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/JVMGeoTiffRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/JVMGeoTiffRasterSource.scala
new file mode 100644
index 000000000..cedb81c61
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/JVMGeoTiffRasterSource.scala
@@ -0,0 +1,28 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import java.net.URI
+
+import geotrellis.contrib.vlm.geotiff.GeoTiffRasterSource
+
+case class JVMGeoTiffRasterSource(source: URI) extends DelegatingRasterSource(source, () => GeoTiffRasterSource(source.toASCIIString))
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/ProjectedRasterLike.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/ProjectedRasterLike.scala
similarity index 95%
rename from core/src/main/scala/astraea/spark/rasterframes/ref/ProjectedRasterLike.scala
rename to core/src/main/scala/org/locationtech/rasterframes/ref/ProjectedRasterLike.scala
index 7c5b2729d..515c47d12 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/ref/ProjectedRasterLike.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/ProjectedRasterLike.scala
@@ -19,7 +19,7 @@
*
*/
-package astraea.spark.rasterframes.ref
+package org.locationtech.rasterframes.ref
import geotrellis.proj4.CRS
import geotrellis.raster.CellGrid
diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RangeReaderRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RangeReaderRasterSource.scala
new file mode 100644
index 000000000..90df001bd
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RangeReaderRasterSource.scala
@@ -0,0 +1,64 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import com.typesafe.scalalogging.LazyLogging
+import geotrellis.proj4.CRS
+import geotrellis.raster.{CellType, GridBounds, MultibandTile, Raster}
+import geotrellis.raster.io.geotiff.Tags
+import geotrellis.raster.io.geotiff.reader.GeoTiffReader
+import geotrellis.util.RangeReader
+import geotrellis.vector.Extent
+import org.locationtech.rasterframes.util.GeoTiffInfoSupport
+
+trait RangeReaderRasterSource extends RasterSource with GeoTiffInfoSupport with LazyLogging {
+ protected def rangeReader: RangeReader
+
+ private def realInfo =
+ GeoTiffReader.readGeoTiffInfo(rangeReader, streaming = true, withOverviews = false)
+
+ protected lazy val tiffInfo = SimpleRasterInfo(realInfo)
+
+ def crs: CRS = tiffInfo.crs
+
+ def extent: Extent = tiffInfo.extent
+
+ override def cols: Int = tiffInfo.rasterExtent.cols
+
+ override def rows: Int = tiffInfo.rasterExtent.rows
+
+ def cellType: CellType = tiffInfo.cellType
+
+ def bandCount: Int = tiffInfo.bandCount
+
+ override def tags: Tags = tiffInfo.tags
+
+ override protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] = {
+ val info = realInfo
+ val geoTiffTile = GeoTiffReader.geoTiffMultibandTile(info)
+ val intersectingBounds = bounds.flatMap(_.intersection(this)).toSeq
+ geoTiffTile.crop(intersectingBounds, bands.toArray).map {
+ case (gb, tile) =>
+ Raster(tile, rasterExtent.extentFor(gb, clamp = true))
+ }
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala
similarity index 62%
rename from core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala
rename to core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala
index ff176765c..b0aabcc48 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala
@@ -19,26 +19,26 @@
*
*/
-package astraea.spark.rasterframes.ref
+package org.locationtech.rasterframes.ref
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
-import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
import com.typesafe.scalalogging.LazyLogging
import geotrellis.proj4.CRS
-import geotrellis.raster.{CellType, GridBounds, Tile, TileLayout}
-import geotrellis.spark.tiling.LayoutDefinition
+import geotrellis.raster.{CellType, GridBounds, Tile}
import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.rf.RasterSourceUDT
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+import org.locationtech.rasterframes.encoders.CatalystSerializer.{CatalystIO, _}
+import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
/**
* A delayed-read projected raster implementation.
*
* @since 8/21/18
*/
-case class RasterRef(source: RasterSource, subextent: Option[Extent])
+case class RasterRef(source: RasterSource, bandIndex: Int, subextent: Option[Extent])
extends ProjectedRasterLike {
def crs: CRS = source.crs
def extent: Extent = subextent.getOrElse(source.extent)
@@ -46,44 +46,24 @@ case class RasterRef(source: RasterSource, subextent: Option[Extent])
def cols: Int = grid.width
def rows: Int = grid.height
def cellType: CellType = source.cellType
- def tile: ProjectedRasterTile = ProjectedRasterTile(realizedTile, extent, crs)
+ def tile: ProjectedRasterTile = ProjectedRasterTile(RasterRefTile(this), extent, crs)
- protected lazy val grid: GridBounds = source.rasterExtent.gridBoundsFor(extent)
+ protected lazy val grid: GridBounds = source.rasterExtent.gridBoundsFor(extent, true)
protected def srcExtent: Extent = extent
protected lazy val realizedTile: Tile = {
- require(source.bandCount == 1, "Expected singleband tile")
- RasterRef.log.trace(s"Fetching $srcExtent from $source")
- source.read(srcExtent).left.get.tile
- }
-
- /** Splits this tile into smaller tiles based on the reported
- * internal structure of the backing format. May return a single item.*/
- def tileToNative: Seq[RasterRef] = {
- val ex = this.extent
- this.source.nativeTiling
- .filter(_ intersects ex)
- .map(e ⇒ RasterRef(this.source, Option(e)))
+ RasterRef.log.trace(s"Fetching $srcExtent from band $bandIndex of $source")
+ source.read(srcExtent, Seq(bandIndex)).tile.band(0)
}
}
object RasterRef extends LazyLogging {
private val log = logger
-
- /** Constructor for when data extent cover whole raster. */
- def apply(source: RasterSource): RasterRef = RasterRef(source, None)
-
- private[rasterframes]
- def defaultLayout(rr: RasterRef): LayoutDefinition =
- LayoutDefinition(rr.extent, rr.source.nativeLayout
- .getOrElse(TileLayout(1, 1, rr.cols, rr.rows))
- )
-
case class RasterRefTile(rr: RasterRef) extends ProjectedRasterTile {
val extent: Extent = rr.extent
val crs: CRS = rr.crs
- override val cellType = rr.cellType
+ override def cellType = rr.cellType
override val cols: Int = rr.cols
override val rows: Int = rr.rows
@@ -98,18 +78,21 @@ object RasterRef extends LazyLogging {
val rsType = new RasterSourceUDT()
override def schema: StructType = StructType(Seq(
StructField("source", rsType, false),
- StructField("subextent", CatalystSerializer[Extent].schema, true)
+ StructField("bandIndex", IntegerType, false),
+ StructField("subextent", schemaOf[Extent], true)
))
override def to[R](t: RasterRef, io: CatalystIO[R]): R = io.create(
io.to(t.source)(RasterSourceUDT.rasterSourceSerializer),
+ t.bandIndex,
t.subextent.map(io.to[Extent]).orNull
)
override def from[R](row: R, io: CatalystIO[R]): RasterRef = RasterRef(
io.get[RasterSource](row, 0)(RasterSourceUDT.rasterSourceSerializer),
- if (io.isNullAt(row, 1)) None
- else Option(io.get[Extent](row, 1))
+ io.getInt(row, 1),
+ if (io.isNullAt(row, 2)) None
+ else Option(io.get[Extent](row, 2))
)
}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala
new file mode 100644
index 000000000..0f73f85cf
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala
@@ -0,0 +1,181 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2018 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import java.net.URI
+
+import com.github.blemale.scaffeine.Scaffeine
+import com.typesafe.scalalogging.LazyLogging
+import geotrellis.proj4.CRS
+import geotrellis.raster._
+import geotrellis.raster.io.geotiff.Tags
+import geotrellis.vector.Extent
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.rf.RasterSourceUDT
+import org.locationtech.rasterframes.model.{FixedRasterExtent, TileContext, TileDimensions}
+import org.locationtech.rasterframes.{NOMINAL_TILE_DIMS, rfConfig}
+
+import scala.concurrent.duration.Duration
+
+/**
+ * Abstraction over fetching geospatial raster data.
+ *
+ * @since 8/21/18
+ */
+@Experimental
+trait RasterSource extends ProjectedRasterLike with Serializable {
+ import RasterSource._
+
+ def crs: CRS
+
+ def extent: Extent
+
+ def cellType: CellType
+
+ def bandCount: Int
+
+ def tags: Tags
+
+ def read(bounds: GridBounds, bands: Seq[Int]): Raster[MultibandTile] =
+ readBounds(Seq(bounds), bands).next()
+
+ def read(extent: Extent, bands: Seq[Int] = SINGLEBAND): Raster[MultibandTile] =
+ read(rasterExtent.gridBoundsFor(extent, clamp = true), bands)
+
+ def readAll(dims: TileDimensions = NOMINAL_TILE_DIMS, bands: Seq[Int] = SINGLEBAND): Seq[Raster[MultibandTile]] =
+ layoutBounds(dims).map(read(_, bands))
+
+ protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]]
+
+ def rasterExtent = FixedRasterExtent(extent, cols, rows)
+
+ def cellSize = CellSize(extent, cols, rows)
+
+ def gridExtent = GridExtent(extent, cellSize)
+
+ def tileContext: TileContext = TileContext(extent, crs)
+
+ def layoutExtents(dims: TileDimensions): Seq[Extent] = {
+ val re = rasterExtent
+ layoutBounds(dims).map(re.rasterExtentFor).map(_.extent)
+ }
+
+ def layoutBounds(dims: TileDimensions): Seq[GridBounds] = {
+ gridBounds.split(dims.cols, dims.rows).toSeq
+ }
+}
+
+object RasterSource extends LazyLogging {
+ final val SINGLEBAND = Seq(0)
+ final val EMPTY_TAGS = Tags(Map.empty, List.empty)
+
+ val cacheTimeout: Duration = Duration.fromNanos(rfConfig.getDuration("raster-source-cache-timeout").toNanos)
+
+ private val rsCache = Scaffeine()
+ .expireAfterAccess(RasterSource.cacheTimeout)
+ .build[String, RasterSource]
+
+ implicit def rsEncoder: ExpressionEncoder[RasterSource] = {
+ RasterSourceUDT // Makes sure UDT is registered first
+ ExpressionEncoder()
+ }
+
+ def apply(source: URI): RasterSource =
+ rsCache.get(
+ source.toASCIIString, _ => source match {
+ case IsGDAL() => GDALRasterSource(source)
+ case IsHadoopGeoTiff() =>
+ // TODO: How can we get the active hadoop configuration
+ // TODO: without having to pass it through?
+ val config = () => new Configuration()
+ HadoopGeoTiffRasterSource(source, config)
+ case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source)
+ case s => throw new UnsupportedOperationException(s"Reading '$s' not supported")
+ }
+ )
+
+ object IsGDAL {
+
+ /** Determine if we should prefer GDAL for all types. */
+ private val preferGdal: Boolean = org.locationtech.rasterframes.rfConfig.getBoolean("prefer-gdal")
+
+ val gdalOnlyExtensions = Seq(".jp2", ".mrf", ".hdf", ".vrt")
+
+ def gdalOnly(source: URI): Boolean =
+ if (gdalOnlyExtensions.exists(source.getPath.toLowerCase.endsWith)) {
+ require(GDALRasterSource.hasGDAL, s"Can only read $source if GDAL is available")
+ true
+ } else false
+
+ /** Extractor for determining if a scheme indicates GDAL preference. */
+ def unapply(source: URI): Boolean = {
+ lazy val schemeIsGdal = Option(source.getScheme())
+ .exists(_.startsWith("gdal"))
+
+ gdalOnly(source) || ((preferGdal || schemeIsGdal) && GDALRasterSource.hasGDAL)
+ }
+ }
+
+ object IsDefaultGeoTiff {
+ def unapply(source: URI): Boolean = source.getScheme match {
+ case "file" | "http" | "https" | "s3" => true
+ case null | "" ⇒ true
+ case _ => false
+ }
+ }
+
+ object IsHadoopGeoTiff {
+ def unapply(source: URI): Boolean = source.getScheme match {
+ case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true
+ case _ => false
+ }
+ }
+
+ trait URIRasterSource { _: RasterSource =>
+ def source: URI
+
+ abstract override def toString: String = {
+ s"${getClass.getSimpleName}(${source})"
+ }
+ }
+ trait URIRasterSourceDebugString { _: RasterSource with URIRasterSource with Product =>
+ def toDebugString: String = {
+ val buf = new StringBuilder()
+ buf.append(productPrefix)
+ buf.append("(")
+ buf.append("source=")
+ buf.append(source.toASCIIString)
+ buf.append(", size=")
+ buf.append(size)
+ buf.append(", dimensions=")
+ buf.append(dimensions)
+ buf.append(", crs=")
+ buf.append(crs)
+ buf.append(", extent=")
+ buf.append(extent)
+ buf.append(")")
+ buf.toString
+ }
+ }
+}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/SimpleRasterInfo.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/SimpleRasterInfo.scala
new file mode 100644
index 000000000..1a67822e5
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/ref/SimpleRasterInfo.scala
@@ -0,0 +1,82 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import com.github.blemale.scaffeine.Scaffeine
+import geotrellis.contrib.vlm.geotiff.GeoTiffRasterSource
+import geotrellis.contrib.vlm.{RasterSource => GTRasterSource}
+import geotrellis.proj4.CRS
+import geotrellis.raster.io.geotiff.Tags
+import geotrellis.raster.io.geotiff.reader.GeoTiffReader
+import geotrellis.raster.{CellType, RasterExtent}
+import geotrellis.vector.Extent
+import org.locationtech.rasterframes.ref.RasterSource.EMPTY_TAGS
+
+case class SimpleRasterInfo(
+ cols: Int,
+ rows: Int,
+ cellType: CellType,
+ extent: Extent,
+ rasterExtent: RasterExtent,
+ crs: CRS,
+ tags: Tags,
+ bandCount: Int,
+ noDataValue: Option[Double]
+)
+
+object SimpleRasterInfo {
+ def apply(info: GeoTiffReader.GeoTiffInfo): SimpleRasterInfo =
+ SimpleRasterInfo(
+ info.segmentLayout.totalCols,
+ info.segmentLayout.totalRows,
+ info.cellType,
+ info.extent,
+ info.rasterExtent,
+ info.crs,
+ info.tags,
+ info.bandCount,
+ info.noDataValue
+ )
+
+ def apply(rs: GTRasterSource): SimpleRasterInfo = {
+ def fetchTags: Tags = rs match {
+ case gt: GeoTiffRasterSource => gt.tiff.tags
+ case _ => EMPTY_TAGS
+ }
+
+ SimpleRasterInfo(
+ rs.cols,
+ rs.rows,
+ rs.cellType,
+ rs.extent,
+ rs.rasterExtent,
+ rs.crs,
+ fetchTags,
+ rs.bandCount,
+ None
+ )
+ }
+
+ lazy val cache = Scaffeine()
+ //.recordStats()
+ .build[String, SimpleRasterInfo]
+}
\ No newline at end of file
diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilterPushdownRules.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilterPushdownRules.scala
similarity index 93%
rename from core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilterPushdownRules.scala
rename to core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilterPushdownRules.scala
index d61640748..3b3e54d6f 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilterPushdownRules.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilterPushdownRules.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,10 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.rules
+package org.locationtech.rasterframes.rules
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.Rule
diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilters.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilters.scala
similarity index 87%
rename from core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilters.scala
rename to core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilters.scala
index 1a1128150..cf731b658 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilters.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilters.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,12 +15,13 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.rules
+package org.locationtech.rasterframes.rules
-import com.vividsolutions.jts.geom.Geometry
+import org.locationtech.jts.geom.Geometry
import org.apache.spark.sql.sources.Filter
/**
diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialRelationReceiver.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialRelationReceiver.scala
similarity index 92%
rename from core/src/main/scala/astraea/spark/rasterframes/rules/SpatialRelationReceiver.scala
rename to core/src/main/scala/org/locationtech/rasterframes/rules/SpatialRelationReceiver.scala
index 36c681d14..403d122ea 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialRelationReceiver.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialRelationReceiver.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,10 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.rules
+package org.locationtech.rasterframes.rules
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.sources.{BaseRelation, Filter}
diff --git a/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialUDFSubstitutionRules.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialUDFSubstitutionRules.scala
new file mode 100644
index 000000000..d6fea76b0
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialUDFSubstitutionRules.scala
@@ -0,0 +1,42 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2018 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.rules
+
+import org.locationtech.rasterframes.expressions.SpatialRelation
+import org.apache.spark.sql.catalyst.expressions.ScalaUDF
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * Swaps out spatial relation UDFs for expression forms.
+ *
+ * @since 2/19/18
+ */
+object SpatialUDFSubstitutionRules extends Rule[LogicalPlan] {
+ def apply(plan: LogicalPlan): LogicalPlan = {
+ plan.transform {
+ case q: LogicalPlan => q.transformExpressions {
+ case s: ScalaUDF => SpatialRelation.fromUDF(s).getOrElse(s)
+ }
+ }
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/TemporalFilters.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/TemporalFilters.scala
similarity index 91%
rename from core/src/main/scala/astraea/spark/rasterframes/rules/TemporalFilters.scala
rename to core/src/main/scala/org/locationtech/rasterframes/rules/TemporalFilters.scala
index 51ea9ddd7..5315b63b7 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/rules/TemporalFilters.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/rules/TemporalFilters.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,10 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.rules
+package org.locationtech.rasterframes.rules
import java.sql.{Date, Timestamp}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/package.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/package.scala
similarity index 59%
rename from core/src/main/scala/astraea/spark/rasterframes/rules/package.scala
rename to core/src/main/scala/org/locationtech/rasterframes/rules/package.scala
index ff4755a86..0f028e14e 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/rules/package.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/rules/package.scala
@@ -1,4 +1,25 @@
-package astraea.spark.rasterframes
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala b/core/src/main/scala/org/locationtech/rasterframes/stats/CellHistogram.scala
similarity index 94%
rename from core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala
rename to core/src/main/scala/org/locationtech/rasterframes/stats/CellHistogram.scala
index efc4908db..be3d547a3 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/stats/CellHistogram.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,14 +15,16 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.stats
-import astraea.spark.rasterframes.encoders.StandardEncoders
+package org.locationtech.rasterframes.stats
+
import geotrellis.raster.Tile
import geotrellis.raster.histogram.{Histogram => GTHistogram}
import org.apache.spark.sql.types._
+import org.locationtech.rasterframes.encoders.StandardEncoders
import scala.collection.mutable.{ListBuffer => MutableListBuffer}
@@ -160,13 +162,13 @@ object CellHistogram {
}
else {
val h = tile.histogram
- h.binCounts().map(p ⇒ Bin(p._1, p._2))
+ h.binCounts().map(p ⇒ Bin(p._1.toDouble, p._2))
}
CellHistogram(bins)
}
def apply(hist: GTHistogram[Int]): CellHistogram = {
- CellHistogram(hist.binCounts().map(p ⇒ Bin(p._1, p._2)))
+ CellHistogram(hist.binCounts().map(p ⇒ Bin(p._1.toDouble, p._2)))
}
def apply(hist: GTHistogram[Double])(implicit ev: DummyImplicit): CellHistogram = {
CellHistogram(hist.binCounts().map(p ⇒ Bin(p._1, p._2)))
diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala b/core/src/main/scala/org/locationtech/rasterframes/stats/CellStatistics.scala
similarity index 93%
rename from core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala
rename to core/src/main/scala/org/locationtech/rasterframes/stats/CellStatistics.scala
index e1ba03b60..ea371666d 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/stats/CellStatistics.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,13 +15,15 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.stats
-import astraea.spark.rasterframes.encoders.StandardEncoders
+package org.locationtech.rasterframes.stats
+
import geotrellis.raster.Tile
import org.apache.spark.sql.types.StructType
+import org.locationtech.rasterframes.encoders.StandardEncoders
/**
* Container for computed statistics over cells.
diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala b/core/src/main/scala/org/locationtech/rasterframes/stats/LocalCellStatistics.scala
similarity index 94%
rename from core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala
rename to core/src/main/scala/org/locationtech/rasterframes/stats/LocalCellStatistics.scala
index 685722f62..39c0671f8 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/stats/LocalCellStatistics.scala
@@ -19,7 +19,8 @@
*
*/
-package astraea.spark.rasterframes.stats
+package org.locationtech.rasterframes.stats
+
import geotrellis.raster.Tile
case class LocalCellStatistics(count: Tile, min: Tile, max: Tile, mean: Tile, variance: Tile)
diff --git a/core/src/main/scala/org/locationtech/rasterframes/tiles/FixedDelegatingTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/FixedDelegatingTile.scala
new file mode 100644
index 000000000..52bfa5c1d
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/FixedDelegatingTile.scala
@@ -0,0 +1,40 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.tiles
+import geotrellis.raster.{ArrayTile, DelegatingTile, Tile}
+
+/**
+ * Temporary workaroud for https://github.com/locationtech/geotrellis/issues/2907
+ *
+ * @since 8/22/18
+ */
+trait FixedDelegatingTile extends DelegatingTile {
+ override def combine(r2: Tile)(f: (Int, Int) ⇒ Int): Tile = (delegate, r2) match {
+ case (del: ArrayTile, r2: DelegatingTile) ⇒ del.combine(r2.toArrayTile())(f)
+ case _ ⇒ delegate.combine(r2)(f)
+ }
+
+ override def combineDouble(r2: Tile)(f: (Double, Double) ⇒ Double): Tile = (delegate, r2) match {
+ case (del: ArrayTile, r2: DelegatingTile) ⇒ del.combineDouble(r2.toArrayTile())(f)
+ case _ ⇒ delegate.combineDouble(r2)(f)
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/InternalRowTile.scala
similarity index 86%
rename from core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala
rename to core/src/main/scala/org/locationtech/rasterframes/tiles/InternalRowTile.scala
index 021f0946a..98be22446 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/InternalRowTile.scala
@@ -19,52 +19,38 @@
*
*/
-package astraea.spark.rasterframes.tiles
+package org.locationtech.rasterframes.tiles
import java.nio.ByteBuffer
-import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
-import astraea.spark.rasterframes.model.{Cells, TileDataContext}
+import org.locationtech.rasterframes.encoders.CatalystSerializer.CatalystIO
import geotrellis.raster._
import org.apache.spark.sql.catalyst.InternalRow
+import org.locationtech.rasterframes.model.{Cells, TileDataContext}
/**
* Wrapper around a `Tile` encoded in a Catalyst `InternalRow`, for the purpose
* of providing compatible semantics over common operations.
*
- * @groupname COPIES Memory Copying
- * @groupdesc COPIES Requires creating an intermediate copy of
- * the complete `Tile` contents, and should be avoided.
- *
* @since 11/29/17
*/
-class InternalRowTile(val mem: InternalRow) extends DelegatingTile {
+class InternalRowTile(val mem: InternalRow) extends FixedDelegatingTile {
import InternalRowTile._
- /** @group COPIES */
- override def toArrayTile(): ArrayTile = realizedTile
+ override def toArrayTile(): ArrayTile = realizedTile.toArrayTile()
- // TODO: We want to reimpliement the delegated methods so that they read directly from tungsten storage
- protected lazy val realizedTile: ArrayTile = {
- val data = toBytes
- if(data.length < cols * rows && cellType.name != "bool") {
- val ctile = ConstantTile.fromBytes(data, cellType, cols, rows)
- val atile = ctile.toArrayTile()
- atile
- }
- else
- ArrayTile.fromBytes(data, cellType, cols, rows)
- }
+ // TODO: We want to reimplement relevant delegated methods so that they read directly from tungsten storage
+ lazy val realizedTile: Tile = cells.toTile(cellContext)
- /** @group COPIES */
protected override def delegate: Tile = realizedTile
- private lazy val cellContext: TileDataContext =
+ private def cellContext: TileDataContext =
CatalystIO[InternalRow].get[TileDataContext](mem, 0)
+ private def cells: Cells = CatalystIO[InternalRow].get[Cells](mem, 1)
/** Retrieve the cell type from the internal encoding. */
- override def cellType: CellType = cellContext.cell_type
+ override def cellType: CellType = cellContext.cellType
/** Retrieve the number of columns from the internal encoding. */
override def cols: Int = cellContext.dimensions.cols
@@ -74,13 +60,9 @@ class InternalRowTile(val mem: InternalRow) extends DelegatingTile {
/** Get the internally encoded tile data cells. */
override lazy val toBytes: Array[Byte] = {
- val cellData = CatalystIO[InternalRow]
- .get[Cells](mem, 1)
- .data
-
- cellData.left
+ cells.data.left
.getOrElse(throw new IllegalStateException(
- "Expected tile cell bytes, but received RasterRef instead: " + cellData.right.get)
+ "Expected tile cell bytes, but received RasterRef instead: " + cells.data.right.get)
)
}
@@ -98,12 +80,11 @@ class InternalRowTile(val mem: InternalRow) extends DelegatingTile {
}
/** Reads the cell value at the given index as an Int. */
- def apply(i: Int): Int = cellReader(i)
+ def apply(i: Int): Int = cellReader.apply(i)
/** Reads the cell value at the given index as a Double. */
def applyDouble(i: Int): Double = cellReader.applyDouble(i)
- /** @group COPIES */
def copy = new InternalRowTile(mem.copy)
private lazy val cellReader: CellReader = {
@@ -132,6 +113,8 @@ class InternalRowTile(val mem: InternalRow) extends DelegatingTile {
case _: DoubleCells ⇒ DoubleCellReader(this)
}
}
+
+ override def toString: String = ShowableTile.show(this)
}
object InternalRowTile {
diff --git a/core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala
similarity index 77%
rename from core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala
rename to core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala
index a9551dd13..92e2d285d 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala
@@ -19,32 +19,33 @@
*
*/
-package astraea.spark.rasterframes.tiles
+package org.locationtech.rasterframes.tiles
-import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
-import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO
-import astraea.spark.rasterframes.model.TileContext
-import astraea.spark.rasterframes.ref.ProjectedRasterLike
-import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
import geotrellis.proj4.CRS
import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.raster.{CellType, ProjectedRaster, Tile}
import geotrellis.vector.{Extent, ProjectedExtent}
-import org.apache.spark.sql.Encoder
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.types.{StructField, StructType}
+import org.locationtech.rasterframes.TileType
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder}
+import org.locationtech.rasterframes.model.TileContext
+import org.locationtech.rasterframes.ref.ProjectedRasterLike
+import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile
/**
* A Tile that's also like a ProjectedRaster, with delayed evaluation support.
*
* @since 9/5/18
*/
-trait ProjectedRasterTile extends DelegatingTile with ProjectedRasterLike {
+trait ProjectedRasterTile extends FixedDelegatingTile with ProjectedRasterLike {
def extent: Extent
def crs: CRS
def projectedExtent: ProjectedExtent = ProjectedExtent(extent, crs)
def projectedRaster: ProjectedRaster[Tile] = ProjectedRaster[Tile](this, extent, crs)
+ def mapTile(f: Tile => Tile): ProjectedRasterTile = ProjectedRasterTile(f(this), extent, crs)
}
object ProjectedRasterTile {
@@ -58,14 +59,19 @@ object ProjectedRasterTile {
case class ConcreteProjectedRasterTile(t: Tile, extent: Extent, crs: CRS)
extends ProjectedRasterTile {
def delegate: Tile = t
+
override def convert(cellType: CellType): Tile =
ConcreteProjectedRasterTile(t.convert(cellType), extent, crs)
- }
+ override def toString: String = {
+ val e = s"(${extent.xmin}, ${extent.ymin}, ${extent.xmax}, ${extent.ymax})"
+ val c = crs.toProj4String
+ s"[${ShowableTile.show(t)}, $e, $c]"
+ }
+ }
implicit val serializer: CatalystSerializer[ProjectedRasterTile] = new CatalystSerializer[ProjectedRasterTile] {
- val TileType = new TileUDT()
override def schema: StructType = StructType(Seq(
- StructField("tile_context", CatalystSerializer[TileContext].schema, false),
+ StructField("tile_context", schemaOf[TileContext], false),
StructField("tile", TileType, false))
)
diff --git a/core/src/main/scala/org/locationtech/rasterframes/tiles/ShowableTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/ShowableTile.scala
new file mode 100644
index 000000000..00872ff6c
--- /dev/null
+++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/ShowableTile.scala
@@ -0,0 +1,55 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.tiles
+import org.locationtech.rasterframes._
+import geotrellis.raster.Tile
+
+class ShowableTile(val delegate: Tile) extends FixedDelegatingTile {
+ override def equals(obj: Any): Boolean = obj match {
+ case st: ShowableTile => delegate.equals(st.delegate)
+ case o => delegate.equals(o)
+ }
+ override def hashCode(): Int = delegate.hashCode()
+ override def toString: String = ShowableTile.show(delegate)
+}
+
+object ShowableTile {
+ private val maxCells = rfConfig.getInt("showable-max-cells")
+ def show(tile: Tile): String = {
+ val ct = tile.cellType
+ val dims = tile.dimensions
+
+ val data = if (tile.cellType.isFloatingPoint)
+ tile.toArrayDouble()
+ else tile.toArray()
+
+ val cells = if(tile.size <= maxCells) {
+ data.mkString("[", ",", "]")
+ }
+ else {
+ val front = data.take(maxCells/2).mkString("[", ",", "")
+ val back = data.takeRight(maxCells/2).mkString("", ",", "]")
+ front + ",...," + back
+ }
+ s"[${ct.name}, $dims, $cells]"
+ }
+}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala b/core/src/main/scala/org/locationtech/rasterframes/util/DataBiasedOp.scala
similarity index 97%
rename from core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/DataBiasedOp.scala
index c2e2578a3..83e5fe76c 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/DataBiasedOp.scala
@@ -19,7 +19,7 @@
*
*/
-package astraea.spark.rasterframes.util
+package org.locationtech.rasterframes.util
import geotrellis.raster
import geotrellis.raster.isNoData
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/GeoTiffInfoSupport.scala b/core/src/main/scala/org/locationtech/rasterframes/util/GeoTiffInfoSupport.scala
similarity index 96%
rename from core/src/main/scala/astraea/spark/rasterframes/util/GeoTiffInfoSupport.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/GeoTiffInfoSupport.scala
index 724d7eaeb..e24bb8175 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/GeoTiffInfoSupport.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/GeoTiffInfoSupport.scala
@@ -19,7 +19,8 @@
*
*/
-package astraea.spark.rasterframes.util
+package org.locationtech.rasterframes.util
+
import geotrellis.raster.TileLayout
import geotrellis.raster.io.geotiff.reader.GeoTiffReader
import geotrellis.raster.io.geotiff.reader.GeoTiffReader.GeoTiffInfo
@@ -37,7 +38,7 @@ trait GeoTiffInfoSupport {
val MAX_SIZE = 256
private def defaultLayout(cols: Int, rows: Int): TileLayout = {
def divs(cells: Int) = {
- val layoutDivs = math.ceil(cells / MAX_SIZE.toFloat)
+ val layoutDivs = math.ceil(cells / MAX_SIZE.toDouble)
val tileDivs = math.ceil(cells / layoutDivs)
(layoutDivs.toInt, tileDivs.toInt)
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/KryoSupport.scala b/core/src/main/scala/org/locationtech/rasterframes/util/KryoSupport.scala
similarity index 97%
rename from core/src/main/scala/astraea/spark/rasterframes/util/KryoSupport.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/KryoSupport.scala
index b20aa7851..26754b91d 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/KryoSupport.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/KryoSupport.scala
@@ -19,7 +19,7 @@
*
*/
-package astraea.spark.rasterframes.util
+package org.locationtech.rasterframes.util
import java.nio.ByteBuffer
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala b/core/src/main/scala/org/locationtech/rasterframes/util/MultibandRender.scala
similarity index 97%
rename from core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/MultibandRender.scala
index 764d049ee..b576f1e67 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/MultibandRender.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,10 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.util
+package org.locationtech.rasterframes.util
import geotrellis.raster._
import geotrellis.raster.render.{ColorRamp, Png}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/RFKryoRegistrator.scala b/core/src/main/scala/org/locationtech/rasterframes/util/RFKryoRegistrator.scala
similarity index 72%
rename from core/src/main/scala/astraea/spark/rasterframes/util/RFKryoRegistrator.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/RFKryoRegistrator.scala
index 58fb62121..8275c6402 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/RFKryoRegistrator.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/RFKryoRegistrator.scala
@@ -19,11 +19,11 @@
*
*/
-package astraea.spark.rasterframes.util
+package org.locationtech.rasterframes.util
-import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
-import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
-import astraea.spark.rasterframes.ref.RasterSource._
+import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile
+import org.locationtech.rasterframes.ref.{DelegatingRasterSource, RasterRef, RasterSource}
+import org.locationtech.rasterframes.ref._
import com.esotericsoftware.kryo.Kryo
@@ -36,14 +36,15 @@ import com.esotericsoftware.kryo.Kryo
class RFKryoRegistrator extends geotrellis.spark.io.kryo.KryoRegistrator {
override def registerClasses(kryo: Kryo): Unit = {
super.registerClasses(kryo)
- kryo.register(classOf[ReadCallback])
kryo.register(classOf[RasterSource])
kryo.register(classOf[RasterRef])
kryo.register(classOf[RasterRefTile])
- kryo.register(classOf[FileGeoTiffRasterSource])
+ kryo.register(classOf[DelegatingRasterSource])
+ kryo.register(classOf[JVMGeoTiffRasterSource])
+ kryo.register(classOf[InMemoryRasterSource])
kryo.register(classOf[HadoopGeoTiffRasterSource])
- kryo.register(classOf[S3GeoTiffRasterSource])
- kryo.register(classOf[HttpGeoTiffRasterSource])
+ kryo.register(classOf[GDALRasterSource])
+ kryo.register(classOf[SimpleRasterInfo])
kryo.register(classOf[geotrellis.raster.io.geotiff.reader.GeoTiffReader.GeoTiffInfo])
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/SubdivideSupport.scala b/core/src/main/scala/org/locationtech/rasterframes/util/SubdivideSupport.scala
similarity index 97%
rename from core/src/main/scala/astraea/spark/rasterframes/util/SubdivideSupport.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/SubdivideSupport.scala
index 162614651..24ee2ce2d 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/SubdivideSupport.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/SubdivideSupport.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,10 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.util
+package org.locationtech.rasterframes.util
import geotrellis.raster.crop.Crop
import geotrellis.raster.{CellGrid, TileLayout}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala b/core/src/main/scala/org/locationtech/rasterframes/util/ZeroSevenCompatibilityKit.scala
similarity index 54%
rename from core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/ZeroSevenCompatibilityKit.scala
index bbb23a282..5826ad09a 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/ZeroSevenCompatibilityKit.scala
@@ -19,17 +19,18 @@
*
*/
-package astraea.spark.rasterframes.util
-import astraea.spark.rasterframes.expressions.TileAssembler
-import astraea.spark.rasterframes.expressions.accessors._
-import astraea.spark.rasterframes.expressions.aggstats._
-import astraea.spark.rasterframes.expressions.generators._
-import astraea.spark.rasterframes.expressions.localops._
-import astraea.spark.rasterframes.expressions.tilestats._
-import astraea.spark.rasterframes.expressions.transformers._
-import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics}
-import astraea.spark.rasterframes.{functions => F}
-import com.vividsolutions.jts.geom.Geometry
+package org.locationtech.rasterframes.util
+
+import org.locationtech.rasterframes.expressions.TileAssembler
+import org.locationtech.rasterframes.expressions.accessors._
+import org.locationtech.rasterframes.expressions.aggregates._
+import org.locationtech.rasterframes.expressions.generators._
+import org.locationtech.rasterframes.expressions.localops._
+import org.locationtech.rasterframes.expressions.tilestats._
+import org.locationtech.rasterframes.expressions.transformers._
+import org.locationtech.rasterframes.stats._
+import org.locationtech.rasterframes.{functions => F}
+import org.locationtech.jts.geom.Geometry
import geotrellis.proj4.CRS
import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
import geotrellis.raster.{CellType, Tile}
@@ -45,181 +46,181 @@ import org.apache.spark.sql.{Column, SQLContext, TypedColumn, rf}
* @since 4/3/17
*/
object ZeroSevenCompatibilityKit {
- import astraea.spark.rasterframes.encoders.StandardEncoders._
+ import org.locationtech.rasterframes.encoders.StandardEncoders._
trait RasterFunctions {
- private val delegate = new astraea.spark.rasterframes.RasterFunctions {}
+ private val delegate = new org.locationtech.rasterframes.RasterFunctions {}
// format: off
/** Create a row for each cell in Tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def explodeTiles(cols: Column*): Column = delegate.explode_tiles(cols: _*)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def explodeTiles(cols: Column*): Column = delegate.rf_explode_tiles(cols: _*)
/** Create a row for each cell in Tile with random sampling and optional seed. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def explodeTilesSample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column =
ExplodeTiles(sampleFraction, seed, cols)
/** Create a row for each cell in Tile with random sampling (no seed). */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def explodeTilesSample(sampleFraction: Double, cols: Column*): Column =
ExplodeTiles(sampleFraction, None, cols)
/** Query the number of (cols, rows) in a Tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def tileDimensions(col: Column): Column = GetDimensions(col)
@Experimental
/** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def arrayToTile(arrayCol: Column, cols: Int, rows: Int) = withAlias("array_to_tile", arrayCol)(
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def arrayToTile(arrayCol: Column, cols: Int, rows: Int) = withAlias("rf_array_to_tile", arrayCol)(
udf[Tile, AnyRef](F.arrayToTile(cols, rows)).apply(arrayCol)
)
/** Create a Tile from a column of cell data with location indexes and preform cell conversion. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def assembleTile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] =
convertCellType(TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile]
/** Create a Tile from a column of cell data with location indexes. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def assembleTile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] =
TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows)
/** Extract the Tile's cell type */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def cellType(col: Column): TypedColumn[Any, CellType] = GetCellType(col)
/** Change the Tile's cell type */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def convertCellType(col: Column, cellType: CellType): TypedColumn[Any, Tile] =
SetCellType(col, cellType)
/** Change the Tile's cell type */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def convertCellType(col: Column, cellTypeName: String): TypedColumn[Any, Tile] =
SetCellType(col, cellTypeName)
/** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def boundsGeometry(bounds: Column): TypedColumn[Any, Geometry] = BoundsToGeometry(bounds)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def boundsGeometry(bounds: Column): TypedColumn[Any, Geometry] = ExtentToGeometry(bounds)
/** Assign a `NoData` value to the Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def withNoData(col: Column, nodata: Double) = withAlias("withNoData", col)(
udf[Tile, Tile](F.withNoData(nodata)).apply(col)
).as[Tile]
/** Compute the full column aggregate floating point histogram. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.agg_approx_histogram(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.rf_agg_approx_histogram(col)
/** Compute the full column aggregate floating point statistics. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.agg_stats(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.rf_agg_stats(col)
/** Computes the column aggregate mean. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def aggMean(col: Column) = CellMeanAggregate(col)
/** Computes the number of non-NoData cells in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggDataCells(col: Column): TypedColumn[Any, Long] = delegate.agg_data_cells(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggDataCells(col: Column): TypedColumn[Any, Long] = delegate.rf_agg_data_cells(col)
/** Computes the number of NoData cells in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def aggNoDataCells(col: Column): TypedColumn[Any, Long] = delegate.agg_no_data_cells(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def aggNoDataCells(col: Column): TypedColumn[Any, Long] = delegate.rf_agg_no_data_cells(col)
/** Compute the Tile-wise mean */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileMean(col: Column): TypedColumn[Any, Double] = delegate.tile_mean(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileMean(col: Column): TypedColumn[Any, Double] = delegate.rf_tile_mean(col)
/** Compute the Tile-wise sum */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileSum(col: Column): TypedColumn[Any, Double] = delegate.tile_sum(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileSum(col: Column): TypedColumn[Any, Double] = delegate.rf_tile_sum(col)
/** Compute the minimum cell value in tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileMin(col: Column): TypedColumn[Any, Double] = delegate.tile_min(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileMin(col: Column): TypedColumn[Any, Double] = delegate.rf_tile_min(col)
/** Compute the maximum cell value in tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileMax(col: Column): TypedColumn[Any, Double] = delegate.tile_max(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileMax(col: Column): TypedColumn[Any, Double] = delegate.rf_tile_max(col)
/** Compute TileHistogram of Tile values. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.tile_histogram(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.rf_tile_histogram(col)
/** Compute statistics of Tile values. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def tileStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.tile_stats(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def tileStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.rf_tile_stats(col)
/** Counts the number of non-NoData cells per Tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def dataCells(tile: Column): TypedColumn[Any, Long] = delegate.data_cells(tile)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def dataCells(tile: Column): TypedColumn[Any, Long] = delegate.rf_data_cells(tile)
/** Counts the number of NoData cells per Tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def noDataCells(tile: Column): TypedColumn[Any, Long] = delegate.no_data_cells(tile)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def noDataCells(tile: Column): TypedColumn[Any, Long] = delegate.rf_no_data_cells(tile)
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def isNoDataTile(tile: Column): TypedColumn[Any, Boolean] = delegate.is_no_data_tile(tile)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def isNoDataTile(tile: Column): TypedColumn[Any, Boolean] = delegate.rf_is_no_data_tile(tile)
/** Compute cell-local aggregate descriptive statistics for a column of Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggStats(col: Column): Column = delegate.agg_local_stats(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggStats(col: Column): Column = delegate.rf_agg_local_stats(col)
/** Compute the cell-wise/local max operation between Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggMax(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_max(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggMax(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_max(col)
/** Compute the cellwise/local min operation between Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggMin(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_min(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggMin(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_min(col)
/** Compute the cellwise/local mean operation between Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggMean(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_mean(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggMean(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_mean(col)
/** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggDataCells(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_data_cells(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggDataCells(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_data_cells(col)
/** Compute the cellwise/local count of NoData cells for all Tiles in a column. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAggNoDataCells(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_no_data_cells(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAggNoDataCells(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_no_data_cells(col)
/** Cellwise addition between two Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAdd(left: Column, right: Column): Column = delegate.local_add(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAdd(left: Column, right: Column): Column = delegate.rf_local_add(left, right)
/** Cellwise addition of a scalar to a tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localAddScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_add(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localAddScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_add(tileCol, value)
/** Cellwise subtraction between two Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localSubtract(left: Column, right: Column): Column = delegate.local_subtract(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localSubtract(left: Column, right: Column): Column = delegate.rf_local_subtract(left, right)
/** Cellwise subtraction of a scalar from a tile. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localSubtractScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_subtract(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localSubtractScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_subtract(tileCol, value)
/** Cellwise multiplication between two Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localMultiply(left: Column, right: Column): Column = delegate.local_multiply(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localMultiply(left: Column, right: Column): Column = delegate.rf_local_multiply(left, right)
/** Cellwise multiplication of a tile by a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localMultiplyScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_multiply(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localMultiplyScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_multiply(tileCol, value)
/** Cellwise division between two Tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localDivide(left: Column, right: Column): Column = delegate.local_divide(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localDivide(left: Column, right: Column): Column = delegate.rf_local_divide(left, right)
/** Cellwise division of a tile by a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localDivideScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_divide(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localDivideScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_divide(tileCol, value)
/** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def localAlgebra(op: LocalTileBinaryOp, left: Column, right: Column):
TypedColumn[Any, Tile] =
withAlias(opName(op), left, right)(
@@ -227,94 +228,94 @@ object ZeroSevenCompatibilityKit {
).as[Tile]
/** Compute the normalized difference of two tile columns */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def normalizedDifference(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.normalized_difference(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def normalizedDifference(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_normalized_difference(left, right)
/** Constructor for constant tile column */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def makeConstantTile(value: Number, cols: Int, rows: Int, cellType: String): TypedColumn[Any, Tile] =
udf(() => F.makeConstantTile(value, cols, rows, cellType)).apply().as(s"constant_$cellType").as[Tile]
/** Alias for column of constant tiles of zero */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def tileZeros(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] =
udf(() => F.tileZeros(cols, rows, cellType)).apply().as(s"zeros_$cellType").as[Tile]
/** Alias for column of constant tiles of one */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def tileOnes(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] =
udf(() => F.tileOnes(cols, rows, cellType)).apply().as(s"ones_$cellType").as[Tile]
/** Where the mask tile equals the mask value, replace values in the source tile with NODATA */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def maskByValue(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] =
- delegate.mask_by_value(sourceTile, maskTile, maskValue)
+ delegate.rf_mask_by_value(sourceTile, maskTile, maskValue)
/** Where the mask tile DOES NOT contain NODATA, replace values in the source tile with NODATA */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def inverseMask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] =
- delegate.inverse_mask(sourceTile, maskTile)
+ delegate.rf_inverse_mask(sourceTile, maskTile)
/** Reproject a column of geometry from one CRS to another. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def reprojectGeometry(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] =
- delegate.reproject_geometry(sourceGeom, srcCRS, dstCRS)
+ delegate.st_reproject(sourceGeom, srcCRS, dstCRS)
/** Render Tile as ASCII string for debugging purposes. */
@Experimental
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def renderAscii(col: Column): TypedColumn[Any, String] = delegate.render_ascii(col)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def renderAscii(col: Column): TypedColumn[Any, String] = delegate.rf_render_ascii(col)
/** Cellwise less than value comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def localLess(left: Column, right: Column): TypedColumn[Any, Tile] =
- delegate.local_less(left, right)
+ delegate.rf_local_less(left, right)
/** Cellwise less than value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localLessScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_less(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localLessScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_less(tileCol, value)
/** Cellwise less than or equal to value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localLessEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_less_equal(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localLessEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_local_less_equal(left, right)
/** Cellwise less than or equal to value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localLessEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_less_equal(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localLessEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_less_equal(tileCol, value)
/** Cellwise greater than value comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
def localGreater(left: Column, right: Column): TypedColumn[Any, Tile] =
- delegate.local_greater(left, right)
+ delegate.rf_local_greater(left, right)
/** Cellwise greater than value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localGreaterScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_greater(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localGreaterScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_greater(tileCol, value)
/** Cellwise greater than or equal to value comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localGreaterEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_greater_equal(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localGreaterEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_local_greater_equal(left, right)
/** Cellwise greater than or equal to value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localGreaterEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_greater_equal(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localGreaterEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_greater_equal(tileCol, value)
/** Cellwise equal to value comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_equal(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_local_equal(left, right)
/** Cellwise equal to value comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_equal(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_equal(tileCol, value)
/** Cellwise inequality comparison between two tiles. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localUnequal(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_unequal(left, right)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localUnequal(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_local_unequal(left, right)
/** Cellwise inequality comparison between a tile and a scalar. */
- @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
- def localUnequalScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_unequal(tileCol, value)
+ @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0")
+ def localUnequalScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_unequal(tileCol, value)
}
def register(sqlContext: SQLContext): Unit = {
@@ -323,6 +324,8 @@ object ZeroSevenCompatibilityKit {
def ub[A, B](f: A => B)(a: Seq[A]): B = f(a.head)
/** Binary expression builder builder. */
def bb[A, B](f: (A, A) => B)(a: Seq[A]): B = f(a.head, a.last)
+ /** Trinary expression builder builder. */
+ def tb[A, B](f: (A, A, A) => B)(a: Seq[A]): B = f(a.head, a.tail.head, a.last)
// Expression-oriented functions have a different registration scheme
// Currently have to register with the `builtin` registry due to Spark data hiding.
@@ -331,7 +334,7 @@ object ZeroSevenCompatibilityKit {
registry.registerFunc("rf_cellType", ub(GetCellType.apply))
registry.registerFunc("rf_convertCellType", bb(SetCellType.apply))
registry.registerFunc("rf_tileDimensions", ub(GetDimensions.apply))
- registry.registerFunc("rf_boundsGeometry", ub(BoundsToGeometry.apply))
+ registry.registerFunc("rf_boundsGeometry", ub(ExtentToGeometry.apply))
registry.registerFunc("rf_localAdd", bb(Add.apply))
registry.registerFunc("rf_localSubtract", bb(Subtract.apply))
registry.registerFunc("rf_localMultiply", bb(Multiply.apply))
@@ -360,11 +363,11 @@ object ZeroSevenCompatibilityKit {
registry.registerFunc("rf_localAggMin", ub(LocalTileOpAggregate.LocalMinUDAF.apply))
registry.registerFunc("rf_localAggCount", ub(LocalCountAggregate.LocalDataCellsUDAF.apply))
registry.registerFunc("rf_localAggMean", ub(LocalMeanAggregate.apply))
+ registry.registerFunc("rf_reprojectGeometry", tb(ReprojectGeometry.apply))
sqlContext.udf.register("rf_makeConstantTile", F.makeConstantTile)
sqlContext.udf.register("rf_tileZeros", F.tileZeros)
sqlContext.udf.register("rf_tileOnes", F.tileOnes)
sqlContext.udf.register("rf_cellTypes", F.cellTypes)
- sqlContext.udf.register("rf_reprojectGeometry", F.reprojectGeometryCRSName)
}
}
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/debug/package.scala b/core/src/main/scala/org/locationtech/rasterframes/util/debug/package.scala
similarity index 80%
rename from core/src/main/scala/astraea/spark/rasterframes/util/debug/package.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/debug/package.scala
index 53b4b6aee..e33529b02 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/debug/package.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/debug/package.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,12 +15,13 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.util
+package org.locationtech.rasterframes.util
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.proj4.LatLng
import geotrellis.vector.{Feature, Geometry}
import geotrellis.vector.io.json.JsonFeatureCollection
@@ -32,21 +33,21 @@ import spray.json.JsValue
* @since 4/6/18
*/
package object debug {
- implicit class RasterFrameWithDebug(val self: RasterFrame) {
+ implicit class RasterFrameWithDebug(val self: RasterFrameLayer) {
/** Renders the whole schema with metadata as a JSON string. */
def describeFullSchema: String = {
self.schema.prettyJson
}
- /** Renders all the extents in this RasterFrame as GeoJSON in EPSG:4326. This does a full
+ /** Renders all the extents in this RasterFrameLayer as GeoJSON in EPSG:4326. This does a full
* table scan and collects **all** the geometry into the driver, and then converts it into a
* Spray JSON data structure. Not performant, and for debugging only. */
def geoJsonExtents: JsValue = {
import spray.json.DefaultJsonProtocol._
val features = self
- .select(BOUNDS_COLUMN, SPATIAL_KEY_COLUMN)
+ .select(GEOMETRY_COLUMN, SPATIAL_KEY_COLUMN)
.collect()
.map{ case (p, s) ⇒ Feature(Geometry(p).reproject(self.crs, LatLng), Map("col" -> s.col, "row" -> s.row)) }
diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/package.scala b/core/src/main/scala/org/locationtech/rasterframes/util/package.scala
similarity index 79%
rename from core/src/main/scala/astraea/spark/rasterframes/util/package.scala
rename to core/src/main/scala/org/locationtech/rasterframes/util/package.scala
index 02a365cea..e94869986 100644
--- a/core/src/main/scala/astraea/spark/rasterframes/util/package.scala
+++ b/core/src/main/scala/org/locationtech/rasterframes/util/package.scala
@@ -15,28 +15,32 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
-import geotrellis.proj4.CRS
-import geotrellis.raster
-import geotrellis.raster.{CellGrid, Tile, isNoData}
+import com.typesafe.scalalogging.Logger
import geotrellis.raster.crop.TileCropMethods
import geotrellis.raster.io.geotiff.reader.GeoTiffReader
import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp
import geotrellis.raster.mask.TileMaskMethods
import geotrellis.raster.merge.TileMergeMethods
import geotrellis.raster.prototype.TilePrototypeMethods
+import geotrellis.raster.{CellGrid, Tile, isNoData}
import geotrellis.spark.Bounds
import geotrellis.spark.tiling.TilerKeyMethods
-import geotrellis.util.{ByteReader, GetComponent, LazyLogging}
-import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression}
+import geotrellis.util.{ByteReader, GetComponent}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, NamedExpression}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.functions._
import org.apache.spark.sql.rf._
import org.apache.spark.sql.types.StringType
-import org.apache.spark.sql.{Column, DataFrame, SQLContext}
+import org.apache.spark.sql._
+import org.slf4j.LoggerFactory
import spire.syntax.cfor._
import scala.Boolean.box
@@ -46,7 +50,10 @@ import scala.Boolean.box
*
* @since 12/18/17
*/
-package object util extends LazyLogging {
+package object util {
+ @transient
+ protected lazy val logger: Logger =
+ Logger(LoggerFactory.getLogger("org.locationtech.rasterframes"))
import reflect.ClassTag
import reflect.runtime.universe._
@@ -71,34 +78,28 @@ package object util extends LazyLogging {
type KeyMethodsProvider[K1, K2] = K1 ⇒ TilerKeyMethods[K1, K2]
- /** Internal method for slapping the RasterFrame seal of approval on a DataFrame. */
- private[rasterframes] def certifyRasterframe(df: DataFrame): RasterFrame =
+ /** Internal method for slapping the RasterFrameLayer seal of approval on a DataFrame. */
+ private[rasterframes] def certifyRasterframe(df: DataFrame): RasterFrameLayer =
shapeless.tag[RasterFrameTag][DataFrame](df)
/** Tags output column with a nicer name. */
private[rasterframes]
- def withAlias(name: String, inputs: Column*)(output: Column) = {
+ def withAlias(name: String, inputs: Column*)(output: Column): Column = {
val paramNames = inputs.map(_.columnName).mkString(",")
output.as(s"$name($paramNames)")
}
+ /** Tags output column with a nicer name, yet strongly typed. */
+ private[rasterframes]
+ def withTypedAlias[T: Encoder](name: String, inputs: Column*)(output: Column): TypedColumn[Any, T] =
+ withAlias(name, inputs: _*)(output).as[T]
+
/** Derives and operator name from the implementing object name. */
private[rasterframes]
def opName(op: LocalTileBinaryOp) =
op.getClass.getSimpleName.replace("$", "").toLowerCase
- object CRSParser {
- def apply(value: String): CRS = {
- value match {
- case e if e.toUpperCase().startsWith("EPSG") => CRS.fromName(e) //not case-sensitive
- case p if p.startsWith("+proj") => CRS.fromString(p) // case sensitive
- case w if w.toUpperCase().startsWith("GEOGCS") => CRS.fromWKT(w) //only case-sensitive inside double quotes
- case _ ⇒ throw new IllegalArgumentException("crs string must be either EPSG code, +proj string, or OGC WKT")
- }
- }
- }
-
implicit class WithCombine[T](left: Option[T]) {
def combine[A, R >: A](a: A)(f: (T, A) ⇒ R): R = left.map(f(_, a)).getOrElse(a)
def tupleWith[R](right: Option[R]): Option[(T, R)] = left.flatMap(l ⇒ right.map((l, _)))
@@ -107,7 +108,9 @@ package object util extends LazyLogging {
implicit class ExpressionWithName(val expr: Expression) extends AnyVal {
import org.apache.spark.sql.catalyst.expressions.Literal
def name: String = expr match {
- case n: NamedExpression ⇒ n.name
+ case n: NamedExpression if n.resolved ⇒ n.name
+ case UnresolvedAttribute(parts) => parts.mkString("_")
+ case Alias(_, name) => name
case l: Literal if l.dataType == StringType ⇒ String.valueOf(l.value)
case o ⇒ o.toString
}
@@ -181,6 +184,27 @@ package object util extends LazyLogging {
}
}
+ implicit class DFWithPrettyPrint(val df: Dataset[_]) extends AnyVal {
+ def toMarkdown(numRows: Int = 5, truncate: Boolean = false): String = {
+ import df.sqlContext.implicits._
+ val cols = df.columns
+ val header = cols.mkString("| ", " | ", " |") + "\n" + ("|---" * cols.length) + "|\n"
+ val stringifiers = cols
+ .map(c => s"`$c`")
+ .map(c => df.col(c).cast(StringType))
+ .map(c => if (truncate) substring(c, 1, 40) else c)
+ val cat = concat_ws(" | ", stringifiers: _*)
+ val body = df
+ .select(cat).limit(numRows)
+ .as[String]
+ .collect()
+ .map(_.replaceAll("\\[", "\\\\["))
+ .map(_.replace('\n', '↩'))
+ .mkString("| ", " |\n| ", " |")
+ header + body
+ }
+ }
+
object Shims {
// GT 1.2.1 to 2.0.0
def toArrayTile[T <: CellGrid](tile: T): T =
diff --git a/core/src/test/resources/B01.jp2 b/core/src/test/resources/B01.jp2
new file mode 100644
index 000000000..18de22f54
Binary files /dev/null and b/core/src/test/resources/B01.jp2 differ
diff --git a/core/src/test/resources/L8-B4-Elkton-VA-4326.tiff b/core/src/test/resources/L8-B4-Elkton-VA-4326.tiff
new file mode 100644
index 000000000..2bc57e255
Binary files /dev/null and b/core/src/test/resources/L8-B4-Elkton-VA-4326.tiff differ
diff --git a/core/src/test/resources/L8-archive.zip b/core/src/test/resources/L8-archive.zip
new file mode 100644
index 000000000..93afb4db4
Binary files /dev/null and b/core/src/test/resources/L8-archive.zip differ
diff --git a/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.idx b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.idx
new file mode 100644
index 000000000..f86df2587
Binary files /dev/null and b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.idx differ
diff --git a/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.lrc b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.lrc
new file mode 100644
index 000000000..75163d4a2
Binary files /dev/null and b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.lrc differ
diff --git a/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf
new file mode 100644
index 000000000..8245c4a7e
--- /dev/null
+++ b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf
@@ -0,0 +1,12 @@
+
+
+
+
+ LERC
+
+
+
+
+ PROJCS["unnamed",GEOGCS["Unknown datum based upon the custom spheroid",DATUM["Not specified (based on custom spheroid)",SPHEROID["Custom spheroid",6371007.181,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Sinusoidal"],PARAMETER["longitude_of_center",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["Meter",1]]
+
+
diff --git a/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf.aux.xml b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf.aux.xml
new file mode 100644
index 000000000..5a18f6944
--- /dev/null
+++ b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf.aux.xml
@@ -0,0 +1,92 @@
+
+
+ LERC
+ PIXEL
+
+
+ 06121997
+ MODIS
+ MODIS
+ Terra
+ Aqua
+ MODIS
+ MODIS
+ Passed
+ Passed was set as a default value. More algorithm will be developed
+ 0
+ AMBRALS_V4.0R1
+ v1.0500m
+ 15.0
+ 463.312716527778
+ volume
+ 2400
+ 2400
+ Day
+ Mandatory QA:
+ 0 = processed, good quality (full BRDF inversions)
+ 1 = processed, see other QA (magnitude BRDF inversions)
+
+ 6.1
+ 150.120692476232
+ N
+ False
+ 75.0
+ 86400
+ 43200
+ 19.9448109058663, 30.0666177912155, 29.9990071837477, 19.8789125843729
+ 127.31379517564, 138.161359988435, 150.130532080915, 138.321766284772
+ 1, 2, 3, 4
+ HDFEOS_V2.19
+ 30
+ 10.5067/MODIS/MCD43A4.006
+ 10.5067/MODIS/MCD43A4.006
+ http://dx.doi.org
+ http://dx.doi.org
+ MYD09GA.A2019113.h30v06.006.2019115025936.hdf, MYD09GA.A2019114.h30v06.006.2019117021858.hdf, MYD09GA.A2019115.h30v06.006.2019117044251.hdf, MYD09GA.A2019116.h30v06.006.2019118031111.hdf, MYD09GA.A2019117.h30v06.006.2019119025916.hdf, MYD09GA.A2019118.h30v06.006.2019120030848.hdf, MOD09GA.A2019113.h30v06.006.2019115032521.hdf, MOD09GA.A2019114.h30v06.006.2019116030646.hdf, MOD09GA.A2019115.h30v06.006.2019117050730.hdf, MOD09GA.A2019116.h30v06.006.2019118032616.hdf, MOD09GA.A2019117.h30v06.006.2019119032020.hdf, MOD09GA.A2019118.h30v06.006.2019120032257.hdf, MCD43DB.A2019110.6.h30v06.hdf
+ MCD43A4.A2019111.h30v06.006.2019120033434.hdf
+ 6.1.34
+ MODIS/Terra+Aqua BRDF/Albedo Nadir BRDF-Adjusted Ref Daily L3 Global - 500m
+ BRDF_Albedo_Band_Mandatory_Quality_Band1
+ 0
+ 500m
+ 29.9999999973059
+ 1
+ NOT SET
+ 0
+ 0
+ 0
+ 100
+ 0
+ 6.0.42
+ MODAPS
+ Linux minion7043 3.10.0-957.5.1.el7.x86_64 #1 SMP Fri Feb 1 14:54:57 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux
+ 2019-04-30T03:34:48.000Z
+ 0
+ 0
+ 99
+ 0
+ 2019-04-13
+ 00:00:00.000000
+ 2019-04-28
+ 23:59:59.999999
+ processed once
+ further update is anticipated
+ Not Investigated
+ See http://landweb.nascom/nasa.gov/cgi-bin/QA_WWW/qaFlagPage.cgi?sat=aqua the product Science Quality status.
+ 06121997
+ MCD43A4
+ 19.9999999982039
+ 2015
+ 51030006
+ concatenated flags
+ 0, 254
+ 6
+ 6
+ 127.701332684185
+ 255
+
+
+ BRDF_Albedo_Band_Mandatory_Quality_Band1
+ concatenated flags
+
+
diff --git a/core/src/test/resources/README.md b/core/src/test/resources/README.md
new file mode 100644
index 000000000..70aa76cd6
--- /dev/null
+++ b/core/src/test/resources/README.md
@@ -0,0 +1,8 @@
+# Test resources
+
+## NAIP Virginia UTM overlaps
+
+ 1. `m_3607717_sw_18_1_20160620_subset.tif` the southwest corner of NAIP m_3607717_sw_18_1. It is in its native CRS EPSG:26918
+ 2. `m_3607824_se_17_1_20160620_subset.tif` the southeast corner of NAIP m_3607824_se_17_1. Overlaps number 1; It is to the east. It is in its native CRS EPSG:26917.
+ 3. `m_3607_box.tif` - an aribtrary burned in polygon in EPSG:4326 partially overlapping both of the above NAIP subsets 1 and 2.
+
diff --git a/core/src/test/resources/application.conf b/core/src/test/resources/application.conf
new file mode 100644
index 000000000..b274441dd
--- /dev/null
+++ b/core/src/test/resources/application.conf
@@ -0,0 +1,12 @@
+gdal {
+ settings {
+ options {
+ // See https://trac.osgeo.org/gdal/wiki/ConfigOptions for options
+ CPL_DEBUG = "OFF"
+ // TIFF_USE_OVR = "NO"
+ // GDAL_TIFF_INTERNAL_MASK = "YES"
+ }
+ // set this to `false` if CPL_DEBUG is `ON`
+ useExceptions: true
+ }
+}
\ No newline at end of file
diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties
index 378ae8e61..39e791fa3 100644
--- a/core/src/test/resources/log4j.properties
+++ b/core/src/test/resources/log4j.properties
@@ -30,15 +30,15 @@ log4j.logger.org.apache.spark.repl.Main=WARN
log4j.logger.org.apache=ERROR
log4j.logger.com.amazonaws=WARN
-log4j.logger.geotrellis=INFO
+log4j.logger.geotrellis=WARN
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
-log4j.logger.astraea.spark.rasterframes=DEBUG
-log4j.logger.astraea.spark.rasterframes.ref=TRACE
+log4j.logger.org.locationtech.rasterframes=WARN
+log4j.logger.org.locationtech.rasterframes.ref=WARN
log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
diff --git a/core/src/test/resources/m_3607717_sw_18_1_20160620_subset.tif b/core/src/test/resources/m_3607717_sw_18_1_20160620_subset.tif
new file mode 100644
index 000000000..862c23645
Binary files /dev/null and b/core/src/test/resources/m_3607717_sw_18_1_20160620_subset.tif differ
diff --git a/core/src/test/resources/m_3607824_se_17_1_20160620_subset.tif b/core/src/test/resources/m_3607824_se_17_1_20160620_subset.tif
new file mode 100644
index 000000000..2b2a7d497
Binary files /dev/null and b/core/src/test/resources/m_3607824_se_17_1_20160620_subset.tif differ
diff --git a/core/src/test/resources/m_3607_box.tif b/core/src/test/resources/m_3607_box.tif
new file mode 100644
index 000000000..50570db26
Binary files /dev/null and b/core/src/test/resources/m_3607_box.tif differ
diff --git a/core/src/test/scala/Scratch.sc b/core/src/test/scala/Scratch.sc
deleted file mode 100644
index e69de29bb..000000000
diff --git a/core/src/test/scala/astraea/spark/rasterframes/GeometryOperationsSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/GeometryOperationsSpec.scala
deleted file mode 100644
index 28d0bcb94..000000000
--- a/core/src/test/scala/astraea/spark/rasterframes/GeometryOperationsSpec.scala
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea. Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- *
- */
-
-package astraea.spark.rasterframes
-
-import java.nio.file.{Files, Paths}
-
-import geotrellis.proj4.LatLng
-import geotrellis.vector.io._
-import geotrellis.vector.io.json.JsonFeatureCollection
-import spray.json.DefaultJsonProtocol._
-import spray.json._
-
-import scala.collection.JavaConversions._
-
-/**
- *
- *
- * @since 5/30/18
- */
-class GeometryOperationsSpec extends TestEnvironment with TestData {
- val geoJson = {
- val p = Paths.get(getClass.getResource("/L8-Labels-Elkton-VA.geojson").toURI)
- Files.readAllLines(p).mkString("\n")
- }
-
- describe("Geometery operations") {
- import spark.implicits._
- it("should rasterize geometry") {
- val rf = l8Sample(1).projectedRaster.toRF.withBounds()
-
- val features = geoJson.parseGeoJson[JsonFeatureCollection].getAllPolygonFeatures[JsObject]()
- val df = features.map(f ⇒ (
- f.geom.reproject(LatLng, rf.crs).jtsGeom,
- f.data.fields("id").asInstanceOf[JsNumber].value.intValue()
- )).toDF("geom", "id")
-
- val toRasterize = rf.crossJoin(df)
-
- val tlm = rf.tileLayerMetadata.merge
-
- val (cols, rows) = tlm.layout.tileLayout.tileDimensions
-
- val rasterized = toRasterize.withColumn("rasterized", rasterize($"geom", $"bounds", $"id", cols, rows))
-
- assert(rasterized.count() === df.count() * rf.count())
- assert(rasterized.select(tile_dimensions($"rasterized")).distinct().count() === 1)
- val pixelCount = rasterized.select(agg_data_cells($"rasterized")).first()
- assert(pixelCount < cols * rows)
-
-
- toRasterize.createOrReplaceTempView("stuff")
- val viaSQL = sql(s"select rf_rasterize(geom, bounds, id, $cols, $rows) as rasterized from stuff")
- assert(viaSQL.select(agg_data_cells($"rasterized")).first === pixelCount)
-
- //rasterized.select($"rasterized".as[Tile]).foreach(t ⇒ t.renderPng(ColorMaps.IGBP).write("target/" + t.hashCode() + ".png"))
- }
- }
-}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala
deleted file mode 100644
index 39ea3b1c1..000000000
--- a/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2019 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes
-
-import com.vividsolutions.jts.geom._
-import geotrellis.proj4.{CRS, LatLng, Sinusoidal, WebMercator}
-import org.apache.spark.sql.Encoders
-import org.scalatest.{FunSpec, Matchers}
-
-/**
- * Test for geometry reprojection.
- *
- * @since 11/29/18
- */
-class ReprojectGeometrySpec extends FunSpec
- with TestEnvironment with Matchers {
- import spark.implicits._
-
- describe("Geometry reprojection") {
- it("should allow reprojection geometry") {
- // Note: Test data copied from ReprojectSpec in GeoTrellis
- val fact = new GeometryFactory()
- val latLng: Geometry = fact.createLineString(Array(
- new Coordinate(-111.09374999999999, 34.784483415461345),
- new Coordinate(-111.09374999999999, 43.29919735147067),
- new Coordinate(-75.322265625, 43.29919735147067),
- new Coordinate(-75.322265625, 34.784483415461345),
- new Coordinate(-111.09374999999999, 34.784483415461345)
- ))
-
- val webMercator: Geometry = fact.createLineString(Array(
- new Coordinate(-12366899.680315234, 4134631.734001753),
- new Coordinate(-12366899.680315234, 5357624.186564572),
- new Coordinate(-8384836.254770693, 5357624.186564572),
- new Coordinate(-8384836.254770693, 4134631.734001753),
- new Coordinate(-12366899.680315234, 4134631.734001753)
- ))
-
- withClue("both literal crs") {
-
- val df = Seq((latLng, webMercator)).toDF("ll", "wm")
-
- val rp = df.select(
- reproject_geometry($"ll", LatLng, WebMercator) as "wm2",
- reproject_geometry($"wm", WebMercator, LatLng) as "ll2",
- reproject_geometry(reproject_geometry($"ll", LatLng, Sinusoidal), Sinusoidal, WebMercator) as "wm3"
- ).as[(Geometry, Geometry, Geometry)]
-
-
- val (wm2, ll2, wm3) = rp.first()
-
- wm2 should matchGeom(webMercator, 0.00001)
- ll2 should matchGeom(latLng, 0.00001)
- wm3 should matchGeom(webMercator, 0.00001)
- }
-
- withClue("one literal crs") {
- implicit val enc = Encoders.tuple(jtsGeometryEncoder, jtsGeometryEncoder, crsEncoder)
-
- val df = Seq((latLng, webMercator, LatLng: CRS)).toDF("ll", "wm", "llCRS")
-
- val rp = df.select(
- reproject_geometry($"ll", $"llCRS", WebMercator) as "wm2",
- reproject_geometry($"wm", WebMercator, $"llCRS") as "ll2",
- reproject_geometry(reproject_geometry($"ll", $"llCRS", Sinusoidal), Sinusoidal, WebMercator) as "wm3"
- ).as[(Geometry, Geometry, Geometry)]
-
-
- val (wm2, ll2, wm3) = rp.first()
-
- wm2 should matchGeom(webMercator, 0.00001)
- ll2 should matchGeom(latLng, 0.00001)
- wm3 should matchGeom(webMercator, 0.00001)
-
- }
- }
- }
-
-}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ml/TileExploderSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ml/TileExploderSpec.scala
deleted file mode 100644
index 8883045e1..000000000
--- a/core/src/test/scala/astraea/spark/rasterframes/ml/TileExploderSpec.scala
+++ /dev/null
@@ -1,26 +0,0 @@
-package astraea.spark.rasterframes.ml
-
-import astraea.spark.rasterframes.{TestData, TestEnvironment}
-import geotrellis.raster.Tile
-import org.apache.spark.sql.functions.lit
-/**
- *
- * @since 2/16/18
- */
-class TileExploderSpec extends TestEnvironment with TestData {
- describe("Tile explode transformer") {
- it("should explode tiles") {
- import spark.implicits._
- val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2").withColumn("other", lit("stuff"))
-
- val exploder = new TileExploder()
- val newSchema = exploder.transformSchema(df.schema)
-
- val exploded = exploder.transform(df)
- assert(newSchema === exploded.schema)
- assert(exploded.columns.length === 5)
- assert(exploded.count() === 9)
- write(exploded)
- }
- }
-}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala
deleted file mode 100644
index 1c1fb182a..000000000
--- a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-
-package astraea.spark.rasterframes.ref
-
-import java.net.URI
-
-import astraea.spark.rasterframes.TestEnvironment.ReadMonitor
-import astraea.spark.rasterframes.ref.RasterSource.FileGeoTiffRasterSource
-import astraea.spark.rasterframes.{TestData, TestEnvironment}
-import geotrellis.raster.io.geotiff.GeoTiff
-import geotrellis.vector.Extent
-import org.apache.spark.sql.rf.RasterSourceUDT
-
-/**
- *
- *
- * @since 8/22/18
- */
-class RasterSourceSpec extends TestEnvironment with TestData {
- def sub(e: Extent) = {
- val c = e.center
- val w = e.width
- val h = e.height
- Extent(c.x, c.y, c.x + w * 0.1, c.y + h * 0.1)
- }
-
- describe("General RasterSource") {
- it("should identify as UDT") {
- assert(new RasterSourceUDT() === new RasterSourceUDT())
- }
- }
-
- describe("HTTP RasterSource") {
- it("should support metadata querying over HTTP") {
- withClue("remoteCOGSingleband") {
- val src = RasterSource(remoteCOGSingleband1)
- assert(!src.extent.isEmpty)
- }
- withClue("remoteCOGMultiband") {
- val src = RasterSource(remoteCOGMultiband)
- assert(!src.extent.isEmpty)
- }
- }
- it("should read sub-tile") {
- withClue("remoteCOGSingleband") {
- val src = RasterSource(remoteCOGSingleband1)
- val Left(raster) = src.read(sub(src.extent))
- assert(raster.size > 0 && raster.size < src.size)
- }
- withClue("remoteCOGMultiband") {
- val src = RasterSource(remoteCOGMultiband)
- //println("CoG size", src.size, src.dimensions)
- val Right(raster) = src.read(sub(src.extent))
- //println("Subtile size", raster.size, raster.dimensions)
- assert(raster.size > 0 && raster.size < src.size)
- }
- }
- it("should Java serialize") {
- import java.io._
- val src = RasterSource(remoteCOGSingleband1)
- val buf = new java.io.ByteArrayOutputStream()
- val out = new ObjectOutputStream(buf)
- out.writeObject(src)
- out.close()
-
- val data = buf.toByteArray
- val in = new ObjectInputStream(new ByteArrayInputStream(data))
- val recovered = in.readObject().asInstanceOf[RasterSource]
- assert(src.toString === recovered.toString)
- }
- }
- describe("File RasterSource") {
- it("should support metadata querying of file") {
- val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toUri
- val src = RasterSource(localSrc)
- assert(!src.extent.isEmpty)
- }
- }
-
- describe("Caching") {
- val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toUri
-
- trait Fixture {
- val counter = ReadMonitor(false)
- val src = RasterSource(localSrc, Some(counter))
- }
-
- it("should cache headers")(new Fixture {
- val e = src.extent
- assert(counter.reads === 1)
-
- val c = src.crs
- val e2 = src.extent
- val ct = src.cellType
- assert(counter.reads === 1)
- })
-
- it("should Spark serialize caching")(new Fixture {
-
- import spark.implicits._
-
- assert(src.isInstanceOf[FileGeoTiffRasterSource])
-
- val e = src.extent
- assert(counter.reads === 1)
-
- val df = Seq(src, src, src).toDS.repartition(3)
- val src2 = df.collect()(1)
-
- val e2 = src2.extent
- val ct = src2.cellType
-
- src2 match {
- case fs: FileGeoTiffRasterSource ⇒
- fs.callback match {
- case Some(cb: ReadMonitor) ⇒ assert(cb.reads === 1)
- case o ⇒ fail(s"Expected '$o' to be a ReadMonitor")
- }
- case o ⇒ fail(s"Expected '$o' to be FileGeoTiffRasterSource")
- }
- })
- }
-
- describe("RasterSourceToTiles Expression") {
- it("should read all tiles") {
- val src = RasterSource(remoteMODIS)
-
- val subrasters = src.readAll().left.get
-
- val collected = subrasters.map(_.extent).reduceLeft(_.combine(_))
-
- assert(src.extent.xmin === collected.xmin +- 0.01)
- assert(src.extent.ymin === collected.ymin +- 0.01)
- assert(src.extent.xmax === collected.xmax +- 0.01)
- assert(src.extent.ymax === collected.ymax +- 0.01)
-
- val totalCells = subrasters.map(_.size).sum
-
- assert(totalCells === src.size)
-
- subrasters.zipWithIndex.foreach{case (r, i) ⇒
- // TODO: how to test?
- GeoTiff(r, src.crs).write(s"target/$i.tiff")
- }
- }
- }
-}
diff --git a/core/src/test/scala/examples/Classification.scala b/core/src/test/scala/examples/Classification.scala
deleted file mode 100644
index 4aebcf742..000000000
--- a/core/src/test/scala/examples/Classification.scala
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package examples
-
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.ml.{NoDataFilter, TileExploder}
-import geotrellis.raster._
-import geotrellis.raster.io.geotiff.reader.GeoTiffReader
-import geotrellis.raster.render.{ColorRamps, IndexedColorMap}
-import org.apache.spark.ml.Pipeline
-import org.apache.spark.ml.classification.DecisionTreeClassifier
-import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
-import org.apache.spark.ml.feature.VectorAssembler
-import org.apache.spark.ml.tuning.{CrossValidator, ParamGridBuilder}
-import org.apache.spark.sql._
-
-object Classification extends App {
-
-// // Utility for reading imagery from our test data set
- def readTiff(name: String) = GeoTiffReader.readSingleband(getClass.getResource(s"/$name").getPath)
-
- implicit val spark = SparkSession.builder()
- .master("local[*]")
- .appName(getClass.getName)
- .getOrCreate()
- .withRasterFrames
-
- import spark.implicits._
-
- // The first step is to load multiple bands of imagery and construct
- // a single RasterFrame from them.
- val filenamePattern = "L8-%s-Elkton-VA.tiff"
- val bandNumbers = 2 to 7
- val bandColNames = bandNumbers.map(b ⇒ s"band_$b").toArray
- val tileSize = 10
-
- // For each identified band, load the associated image file
- val joinedRF = bandNumbers
- .map { b ⇒ (b, filenamePattern.format("B" + b)) }
- .map { case (b, f) ⇒ (b, readTiff(f)) }
- .map { case (b, t) ⇒ t.projectedRaster.toRF(tileSize, tileSize, s"band_$b") }
- .reduce(_ spatialJoin _)
-
- // We should see a single spatial_key column along with 4 columns of tiles.
- joinedRF.printSchema()
-
- // Similarly pull in the target label data.
- val targetCol = "target"
-
- // Load the target label raster. We have to convert the cell type to
- // Double to meet expectations of SparkML
- val target = readTiff(filenamePattern.format("Labels"))
- .mapTile(_.convert(DoubleConstantNoDataCellType))
- .projectedRaster
- .toRF(tileSize, tileSize, targetCol)
-
- // Take a peek at what kind of label data we have to work with.
- target.select(agg_stats(target(targetCol))).show
-
- val abt = joinedRF.spatialJoin(target)
-
- // SparkML requires that each observation be in its own row, and those
- // observations be packed into a single `Vector`. The first step is to
- // "explode" the tiles into a single row per cell/pixel
- val exploder = new TileExploder()
-
- val noDataFilter = new NoDataFilter()
- .setInputCols(bandColNames :+ targetCol)
-
- // To "vectorize" the the band columns we use the SparkML `VectorAssembler`
- val assembler = new VectorAssembler()
- .setInputCols(bandColNames)
- .setOutputCol("features")
-
- // Using a decision tree for classification
- val classifier = new DecisionTreeClassifier()
- .setLabelCol(targetCol)
- .setFeaturesCol(assembler.getOutputCol)
-
- // Assemble the model pipeline
- val pipeline = new Pipeline()
- .setStages(Array(exploder, noDataFilter, assembler, classifier))
-
- // Configure how we're going to evaluate our model's performance.
- val evaluator = new MulticlassClassificationEvaluator()
- .setLabelCol(targetCol)
- .setPredictionCol("prediction")
- .setMetricName("f1")
-
- // Use a parameter grid to determine what the optimal max tree depth is for this data
- val paramGrid = new ParamGridBuilder()
- //.addGrid(classifier.maxDepth, Array(1, 2, 3, 4))
- .build()
-
- // Configure the cross validator
- val trainer = new CrossValidator()
- .setEstimator(pipeline)
- .setEvaluator(evaluator)
- .setEstimatorParamMaps(paramGrid)
- .setNumFolds(4)
-
- // Push the "go" button
- val model = trainer.fit(abt)
-
- // Format the `paramGrid` settings resultant model
- val metrics = model.getEstimatorParamMaps
- .map(_.toSeq.map(p ⇒ s"${p.param.name} = ${p.value}"))
- .map(_.mkString(", "))
- .zip(model.avgMetrics)
-
- // Render the parameter/performance association
- metrics.toSeq.toDF("params", "metric").show(false)
-
- // Score the original data set, including cells
- // without target values.
- val scored = model.bestModel.transform(joinedRF)
-
- // Add up class membership results
- scored.groupBy($"prediction" as "class").count().show
-
- scored.show(10)
-
- val tlm = joinedRF.tileLayerMetadata.left.get
-
- val retiled = scored.groupBy($"spatial_key").agg(
- assemble_tile(
- $"column_index", $"row_index", $"prediction",
- tlm.tileCols, tlm.tileRows, IntConstantNoDataCellType
- )
- )
-
- val rf = retiled.asRF($"spatial_key", tlm)
-
- val raster = rf.toRaster($"prediction", 186, 169)
-
- val clusterColors = IndexedColorMap.fromColorMap(
- ColorRamps.Viridis.toColorMap((0 until 3).toArray)
- )
-
- raster.tile.renderPng(clusterColors).write("target/scala-2.11/tut/ml/classified.png")
-
- spark.stop()
-}
diff --git a/core/src/test/scala/examples/Clustering.scala b/core/src/test/scala/examples/Clustering.scala
deleted file mode 100644
index 2f8d4ce1f..000000000
--- a/core/src/test/scala/examples/Clustering.scala
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package examples
-
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.ml.TileExploder
-import geotrellis.raster.ByteConstantNoDataCellType
-import geotrellis.raster.io.geotiff.reader.GeoTiffReader
-import geotrellis.raster.render.{ColorRamps, IndexedColorMap}
-import org.apache.spark.ml.Pipeline
-import org.apache.spark.ml.clustering.{KMeans, KMeansModel}
-import org.apache.spark.ml.feature.VectorAssembler
-import org.apache.spark.sql._
-
-object Clustering extends App {
-
- // Utility for reading imagery from our test data set
- def readTiff(name: String) = GeoTiffReader.readSingleband(getClass.getResource(s"/$name").getPath)
-
- implicit val spark = SparkSession.builder().master("local[*]").appName(getClass.getName).getOrCreate().withRasterFrames
-
- import spark.implicits._
-
- // The first step is to load multiple bands of imagery and construct
- // a single RasterFrame from them.
- val filenamePattern = "L8-B%d-Elkton-VA.tiff"
- val bandNumbers = 1 to 7
- val bandColNames = bandNumbers.map(b ⇒ s"band_$b").toArray
-
- // For each identified band, load the associated image file
- val joinedRF = bandNumbers
- .map { b ⇒ (b, filenamePattern.format(b)) }
- .map { case (b,f) ⇒ (b, readTiff(f)) }
- .map { case (b, t) ⇒ t.projectedRaster.toRF(s"band_$b") }
- .reduce(_ spatialJoin _)
-
- // We should see a single spatial_key column along with 4 columns of tiles.
- joinedRF.printSchema()
-
- // SparkML requires that each observation be in its own row, and those
- // observations be packed into a single `Vector`. The first step is to
- // "explode" the tiles into a single row per cell/pixel
- val exploder = new TileExploder()
-
- // To "vectorize" the the band columns we use the SparkML `VectorAssembler`
- val assembler = new VectorAssembler()
- .setInputCols(bandColNames)
- .setOutputCol("features")
-
- // Configure our clustering algorithm
- val k = 5
- val kmeans = new KMeans().setK(k)
-
- // Combine the two stages
- val pipeline = new Pipeline().setStages(Array(exploder, assembler, kmeans))
-
- // Compute clusters
- val model = pipeline.fit(joinedRF)
-
- // Run the data through the model to assign cluster IDs to each
- val clustered = model.transform(joinedRF)
- clustered.show(8)
-
- // If we want to inspect the model statistics, the SparkML API requires us to go
- // through this unfortunate contortion:
- val clusterResults = model.stages.collect{ case km: KMeansModel ⇒ km}.head
-
- // Compute sum of squared distances of points to their nearest center
- val metric = clusterResults.computeCost(clustered)
- println("Within set sum of squared errors: " + metric)
-
- val tlm = joinedRF.tileLayerMetadata.left.get
-
- val retiled = clustered.groupBy($"spatial_key").agg(
- assemble_tile(
- $"column_index", $"row_index", $"prediction",
- tlm.tileCols, tlm.tileRows, ByteConstantNoDataCellType)
- )
-
- val rf = retiled.asRF($"spatial_key", tlm)
-
- val raster = rf.toRaster($"prediction", 186, 169)
-
- val clusterColors = IndexedColorMap.fromColorMap(
- ColorRamps.Viridis.toColorMap((0 until k).toArray)
- )
-
- raster.tile.renderPng(clusterColors).write("clustered.png")
-
- spark.stop()
-}
diff --git a/core/src/test/scala/examples/CreatingRasterFrames.scala b/core/src/test/scala/examples/CreatingRasterFrames.scala
index f7a69043a..8b5c00c72 100644
--- a/core/src/test/scala/examples/CreatingRasterFrames.scala
+++ b/core/src/test/scala/examples/CreatingRasterFrames.scala
@@ -27,19 +27,17 @@ package examples
object CreatingRasterFrames extends App {
// # Creating RasterFrames
//
-// There are a number of ways to create a `RasterFrame`, as enumerated in the sections below.
+// There are a number of ways to create a `RasterFrameLayer`, as enumerated in the sections below.
//
// ## Initialization
//
// First, some standard `import`s:
- import astraea.spark.rasterframes._
+ import org.locationtech.rasterframes._
import geotrellis.raster._
- import geotrellis.raster.render._
- import geotrellis.spark.io._
import geotrellis.raster.io.geotiff.SinglebandGeoTiff
+ import geotrellis.spark.io._
import org.apache.spark.sql._
- import org.apache.spark.sql.functions._
// Next, initialize the `SparkSession`, and call the `withRasterFrames` method on it:
@@ -47,27 +45,25 @@ object CreatingRasterFrames extends App {
master("local[*]").appName("RasterFrames").
getOrCreate().
withRasterFrames
-
- import spark.implicits._
spark.sparkContext.setLogLevel("ERROR")
// ## From `ProjectedExtent`
//
-// The simplest mechanism for getting a RasterFrame is to use the `toRF(tileCols, tileRows)` extension method on `ProjectedRaster`.
+// The simplest mechanism for getting a RasterFrameLayer is to use the `toLayer(tileCols, tileRows)` extension method on `ProjectedRaster`.
val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff")
- val rf = scene.projectedRaster.toRF(128, 128)
+ val rf = scene.projectedRaster.toLayer(128, 128)
rf.show(5, false)
// ## From `TileLayerRDD`
//
-// Another option is to use a GeoTrellis [`LayerReader`](https://docs.geotrellis.io/en/latest/guide/tile-backends.html), to get a `TileLayerRDD` for which there's also a `toRF` extension method.
+// Another option is to use a GeoTrellis [`LayerReader`](https://docs.geotrellis.io/en/latest/guide/tile-backends.html), to get a `TileLayerRDD` for which there's also a `toLayer` extension method.
// ## Inspecting Structure
//
-// `RasterFrame` has a number of methods providing access to metadata about the contents of the RasterFrame.
+// `RasterFrameLayer` has a number of methods providing access to metadata about the contents of the RasterFrameLayer.
//
// ### Tile Column Names
diff --git a/core/src/test/scala/examples/Exporting.scala b/core/src/test/scala/examples/Exporting.scala
index 247e93944..25fa321c1 100644
--- a/core/src/test/scala/examples/Exporting.scala
+++ b/core/src/test/scala/examples/Exporting.scala
@@ -20,13 +20,11 @@
package examples
import java.nio.file.Files
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.raster._
+import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.raster.render._
-import geotrellis.raster.io.geotiff.{GeoTiff, SinglebandGeoTiff}
import geotrellis.spark.{LayerId, SpatialKey}
-import geotrellis.spark.io.LayerWriter
-import geotrellis.spark.io.file.{FileAttributeStore, FileLayerWriter}
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import spray.json.JsValue
@@ -40,7 +38,7 @@ object Exporting extends App {
import spark.implicits._
val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff")
- val rf = scene.projectedRaster.toRF(128, 128).cache()
+ val rf = scene.projectedRaster.toLayer(128, 128).cache()
// While the goal of RasterFrames is to make it as easy as possible to do your geospatial analysis with a single
// construct, it is helpful to be able to transform it into other representations for various use cases.
@@ -54,17 +52,17 @@ object Exporting extends App {
// The @scaladoc[`tile_to_array`][tile_to_array] column function requires a type parameter to indicate the array element
// type you would like used. The following types may be used: `Int`, `Double`, `Byte`, `Short`, `Float`
- val withArrays = rf.withColumn("tileData", tile_to_array_int($"tile")).drop("tile")
+ val withArrays = rf.withColumn("tileData", rf_tile_to_array_int($"tile")).drop("tile")
withArrays.show(5, 40)
// You can convert the data back to an array, but you have to specify the target tile dimensions.
- val tileBack = withArrays.withColumn("tileAgain", array_to_tile($"tileData", 128, 128))
+ val tileBack = withArrays.withColumn("tileAgain", rf_array_to_tile($"tileData", 128, 128))
tileBack.drop("tileData").show(5, 40)
// Note that the created tile will not have a `NoData` value associated with it. Here's how you can do that:
- val tileBackAgain = withArrays.withColumn("tileAgain", with_no_data(array_to_tile($"tileData", 128, 128), 3))
+ val tileBackAgain = withArrays.withColumn("tileAgain", rf_with_no_data(rf_array_to_tile($"tileData", 128, 128), 3))
tileBackAgain.drop("tileData").show(5, 50)
// ## Writing to Parquet
@@ -75,15 +73,13 @@ object Exporting extends App {
// the imagery types.
//
//
- // Let's assume we have a RasterFrame we've done some fancy processing on:
-
- import geotrellis.raster.equalization._
+ // Let's assume we have a RasterFrameLayer we've done some fancy processing on:
val equalizer = udf((t: Tile) => t.equalize())
- val equalized = rf.withColumn("equalized", equalizer($"tile")).asRF
+ val equalized = rf.withColumn("equalized", equalizer($"tile")).asLayer
equalized.printSchema
- equalized.select(agg_stats($"tile")).show(false)
- equalized.select(agg_stats($"equalized")).show(false)
+ equalized.select(rf_agg_stats($"tile")).show(false)
+ equalized.select(rf_agg_stats($"equalized")).show(false)
// We write it out just like any other DataFrame, including the ability to specify partitioning:
@@ -102,12 +98,12 @@ object Exporting extends App {
val rf2 = spark.read.parquet(filePath)
rf2.printSchema
- equalized.select(agg_stats($"tile")).show(false)
- equalized.select(agg_stats($"equalized")).show(false)
+ equalized.select(rf_agg_stats($"tile")).show(false)
+ equalized.select(rf_agg_stats($"equalized")).show(false)
// ## Converting to `RDD` and `TileLayerRDD`
//
- // Since a `RasterFrame` is just a `DataFrame` with extra metadata, the method
+ // Since a `RasterFrameLayer` is just a `DataFrame` with extra metadata, the method
// @scaladoc[`DataFrame.rdd`][rdd] is available for simple conversion back to `RDD` space. The type returned
// by `.rdd` is dependent upon how you select it.
@@ -122,14 +118,14 @@ object Exporting extends App {
showType(rf.select(rf.spatialKeyColumn, $"tile").as[(SpatialKey, Tile)].rdd)
// If your goal convert a single tile column with its spatial key back to a `TileLayerRDD[K]`, then there's an additional
- // extension method on `RasterFrame` called [`toTileLayerRDD`][toTileLayerRDD], which preserves the tile layer metadata,
+ // extension method on `RasterFrameLayer` called [`toTileLayerRDD`][toTileLayerRDD], which preserves the tile layer metadata,
// enhancing interoperation with GeoTrellis RDD extension methods.
showType(rf.toTileLayerRDD($"tile".as[Tile]))
// ## Exporting a Raster
//
- // For the purposes of debugging, the RasterFrame tiles can be reassembled back into a raster for viewing. However,
+ // For the purposes of debugging, the RasterFrameLayer tiles can be reassembled back into a raster for viewing. However,
// keep in mind that this will download all the data to the driver, and reassemble it in-memory. So it's not appropriate
// for very large coverages.
//
@@ -151,7 +147,7 @@ object Exporting extends App {
// [*Download GeoTIFF*](rf-raster.tiff)
// # Exporting to a GeoTrellis Layer
- // First, convert the RasterFrame into a TileLayerRDD. The return type is an Either;
+ // First, convert the RasterFrameLayer into a TileLayerRDD. The return type is an Either;
// the `left` side is for spatial-only keyed data
val tlRDD = equalized.toTileLayerRDD($"equalized").left.get
diff --git a/core/src/test/scala/examples/LocalArithmetic.scala b/core/src/test/scala/examples/LocalArithmetic.scala
index ddf666e96..428fcc64a 100644
--- a/core/src/test/scala/examples/LocalArithmetic.scala
+++ b/core/src/test/scala/examples/LocalArithmetic.scala
@@ -19,7 +19,7 @@
package examples
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.spark.io.kryo.KryoRegistrator
import org.apache.spark.serializer.KryoSerializer
@@ -49,15 +49,15 @@ object LocalArithmetic extends App {
val joinedRF = bandNumbers.
map { b ⇒ (b, filenamePattern.format(b)) }.
map { case (b, f) ⇒ (b, readTiff(f)) }.
- map { case (b, t) ⇒ t.projectedRaster.toRF(s"band_$b") }.
+ map { case (b, t) ⇒ t.projectedRaster.toLayer(s"band_$b") }.
reduce(_ spatialJoin _)
- val addRF = joinedRF.withColumn("1+2", local_add(joinedRF("band_1"), joinedRF("band_2"))).asRF
- val divideRF = joinedRF.withColumn("1/2", local_divide(joinedRF("band_1"), joinedRF("band_2"))).asRF
+ val addRF = joinedRF.withColumn("1+2", rf_local_add(joinedRF("band_1"), joinedRF("band_2"))).asLayer
+ val divideRF = joinedRF.withColumn("1/2", rf_local_divide(joinedRF("band_1"), joinedRF("band_2"))).asLayer
addRF.select("1+2").collect().apply(0) .getClass
- val raster = divideRF.select(tile_sum(divideRF("1/2")),
- tile_sum(joinedRF("band_1")), tile_sum(joinedRF("band_2")))
+ val raster = divideRF.select(rf_tile_sum(divideRF("1/2")),
+ rf_tile_sum(joinedRF("band_1")), rf_tile_sum(joinedRF("band_2")))
raster.show(1)
}
\ No newline at end of file
diff --git a/core/src/test/scala/examples/Masking.scala b/core/src/test/scala/examples/Masking.scala
index bc9c59213..6270bcef1 100644
--- a/core/src/test/scala/examples/Masking.scala
+++ b/core/src/test/scala/examples/Masking.scala
@@ -1,12 +1,11 @@
package examples
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.raster.io.geotiff.SinglebandGeoTiff
-import org.apache.spark.sql._
-import geotrellis.raster.{mask => _, _}
import geotrellis.raster.render._
+import geotrellis.raster.{mask => _, _}
+import org.apache.spark.sql._
import org.apache.spark.sql.functions._
-import astraea.spark.rasterframes.stats.{CellHistogram=>CH}
object Masking extends App {
@@ -25,18 +24,18 @@ object Masking extends App {
val joinedRF = bandNumbers.
map { b ⇒ (b, filenamePattern.format(b)) }.
map { case (b, f) ⇒ (b, readTiff(f)) }.
- map { case (b, t) ⇒ t.projectedRaster.toRF(s"band_$b") }.
+ map { case (b, t) ⇒ t.projectedRaster.toLayer(s"band_$b") }.
reduce(_ spatialJoin _)
val threshold = udf((t: Tile) => {
t.convert(IntConstantNoDataCellType).map(x => if (x > 10500) x else NODATA)
} )
- val withMaskedTile = joinedRF.withColumn("maskTile", threshold(joinedRF("band_1"))).asRF
+ val withMaskedTile = joinedRF.withColumn("maskTile", threshold(joinedRF("band_1"))).asLayer
- withMaskedTile.select(no_data_cells(withMaskedTile("maskTile"))).show()
+ withMaskedTile.select(rf_no_data_cells(withMaskedTile("maskTile"))).show()
- val masked = withMaskedTile.withColumn("masked", mask(joinedRF("band_2"), joinedRF("maskTile"))).asRF
+ val masked = withMaskedTile.withColumn("masked", rf_mask(joinedRF("band_2"), joinedRF("maskTile"))).asLayer
val maskRF = masked.toRaster(masked("masked"), 466, 428)
val b2 = masked.toRaster(masked("band_2"), 466, 428)
diff --git a/core/src/test/scala/examples/MeanValue.scala b/core/src/test/scala/examples/MeanValue.scala
index d2190a241..2ee264469 100644
--- a/core/src/test/scala/examples/MeanValue.scala
+++ b/core/src/test/scala/examples/MeanValue.scala
@@ -19,10 +19,9 @@
package examples
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.functions._
/**
* Compute the cell mean value of an image.
@@ -40,12 +39,12 @@ object MeanValue extends App {
val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff")
- val rf = scene.projectedRaster.toRF(128, 128) // <-- tile size
+ val rf = scene.projectedRaster.toLayer(128, 128) // <-- tile size
rf.printSchema
val tileCol = rf("tile")
- rf.agg(agg_no_data_cells(tileCol), agg_data_cells(tileCol), agg_mean(tileCol)).show(false)
+ rf.agg(rf_agg_no_data_cells(tileCol), rf_agg_data_cells(tileCol), rf_agg_mean(tileCol)).show(false)
spark.stop()
}
diff --git a/core/src/test/scala/examples/NDVI.scala b/core/src/test/scala/examples/NDVI.scala
index 971dfd8d4..48a6f6e51 100644
--- a/core/src/test/scala/examples/NDVI.scala
+++ b/core/src/test/scala/examples/NDVI.scala
@@ -20,7 +20,7 @@
package examples
import java.nio.file.{Files, Paths}
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
import geotrellis.raster._
import geotrellis.raster.render._
import geotrellis.raster.io.geotiff.{GeoTiff, SinglebandGeoTiff}
@@ -46,8 +46,8 @@ object NDVI extends App {
import spark.implicits._
- def redBand = readTiff("L8-B4-Elkton-VA.tiff").projectedRaster.toRF("red_band")
- def nirBand = readTiff("L8-B5-Elkton-VA.tiff").projectedRaster.toRF("nir_band")
+ def redBand = readTiff("L8-B4-Elkton-VA.tiff").projectedRaster.toLayer("red_band")
+ def nirBand = readTiff("L8-B5-Elkton-VA.tiff").projectedRaster.toLayer("nir_band")
val ndvi = udf((red: Tile, nir: Tile) => {
val redd = red.convert(DoubleConstantNoDataCellType)
@@ -55,7 +55,7 @@ object NDVI extends App {
(nird - redd) / (nird + redd)
})
- val rf = redBand.spatialJoin(nirBand).withColumn("ndvi", ndvi($"red_band", $"nir_band")).asRF
+ val rf = redBand.spatialJoin(nirBand).withColumn("ndvi", ndvi($"red_band", $"nir_band")).asLayer
rf.printSchema()
diff --git a/core/src/test/scala/examples/NaturalColorComposite.scala b/core/src/test/scala/examples/NaturalColorComposite.scala
index 3dee4092c..1a3e212ac 100644
--- a/core/src/test/scala/examples/NaturalColorComposite.scala
+++ b/core/src/test/scala/examples/NaturalColorComposite.scala
@@ -20,11 +20,8 @@
package examples
-import java.nio.file.{CopyOption, StandardCopyOption}
-
+import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.raster.{MultibandTile, UByteConstantNoDataCellType}
-import geotrellis.raster.io.geotiff.{GeoTiff, SinglebandGeoTiff}
-import geotrellis.raster.render._
/**
*
diff --git a/core/src/test/scala/examples/Scratch.scala b/core/src/test/scala/examples/Scratch.scala
deleted file mode 100644
index fca8c4785..000000000
--- a/core/src/test/scala/examples/Scratch.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package examples
-
-import astraea.spark.rasterframes._
-import geotrellis.spark.io.kryo.KryoRegistrator
-import org.apache.spark.serializer.KryoSerializer
-import org.apache.spark.sql._
-
-/**
- * Boilerplate test run file
- *
- * @since 10/8/17
- */
-object Scratch extends App {
- implicit val spark = SparkSession.builder()
- .master("local[*]")
- .appName(getClass.getName)
- .config("spark.serializer", classOf[KryoSerializer].getName)
- .config("spark.kryoserializer.buffer.max", "500m")
- .config("spark.kryo.registrationRequired", "false")
- .config("spark.kryo.registrator", classOf[KryoRegistrator].getName)
- .getOrCreate()
- .withRasterFrames
-
- import spark.implicits._
-
- // Your Spark code here.....
-
-}
diff --git a/core/src/test/scala/examples/Tour.scala b/core/src/test/scala/examples/Tour.scala
deleted file mode 100644
index d69cb5a1c..000000000
--- a/core/src/test/scala/examples/Tour.scala
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2017 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package examples
-
-import org.apache.spark.sql._
-import org.apache.spark.sql.functions._
-import geotrellis.raster.io.geotiff._
-import geotrellis.raster.{ByteConstantNoDataCellType, Tile}
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.ml.TileExploder
-import geotrellis.raster.render.{ColorRamps, IndexedColorMap}
-import org.apache.spark.ml.Pipeline
-import org.apache.spark.ml.clustering.KMeans
-import org.apache.spark.ml.feature.VectorAssembler
-
-/**
- * Example tour of some general features in RasterFrames
- *
- * @since 10/24/17
- */
-object Tour extends App {
- implicit val spark = SparkSession.builder()
- .master("local[*]")
- .appName(getClass.getName)
- .getOrCreate()
- .withRasterFrames
-
- import spark.implicits._
-
- // Read in a geo-referenced image
- val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff")
-
- // Convert it to a raster frame, discretizing it into the given tile size.
- val rf = scene.projectedRaster.toRF(64, 64)
-
- // See how many tiles we have after discretization
- println("Tile count: " + rf.count())
-
- // Take a peek at what we're working with
- rf.show(8, false)
-
- // Confirm we have equally sized tiles
- rf.select(tile_dimensions($"tile")).distinct().show()
-
- // Count the number of no-data cells
- rf.select(agg_no_data_cells($"tile")).show(false)
-
- // Compute per-tile statistics
- rf.select(tile_stats($"tile")).show(8, false)
-
- // Compute some aggregate stats over all cells
- rf.select(agg_stats($"tile")).show(false)
-
- // Create a Spark UDT to perform contrast adjustment via GeoTrellis
- val contrast = udf((t: Tile) ⇒ t.sigmoidal(0.2, 10))
-
- // Let's contrast adjust the tile column
- val withAdjusted = rf.withColumn("adjusted", contrast($"tile")).asRF
-
- // Show the stats for the adjusted version
- withAdjusted.select(agg_stats($"adjusted")).show(false)
-
- // Reassemble into a raster and save to a file
- val raster = withAdjusted.toRaster($"adjusted", 774, 500)
- GeoTiff(raster).write("contrast-adjusted.tiff")
-
- // Perform some arbitrary local ops between columns and render
- val withOp = withAdjusted.withColumn("op", local_subtract($"tile", $"adjusted")).asRF
- val raster2 = withOp.toRaster($"op", 774, 500)
- GeoTiff(raster2).write("with-op.tiff")
-
-
- // Perform k-means clustering
- val k = 4
-
- // SparkML doesn't like NoData/NaN values, so we set the no-data value to something less offensive
- val forML = rf.select(rf.spatialKeyColumn, with_no_data($"tile", 99999) as "tile").asRF
-
- // First we instantiate the transformer that converts tile rows into cell rows.
- val exploder = new TileExploder()
-
- // This transformer wraps the pixel values in a vector.
- // Could use this with multiple bands
- val assembler = new VectorAssembler().
- setInputCols(Array("tile")).
- setOutputCol("features")
-
- // Or clustering algorithm
- val kmeans = new KMeans().setK(k)
-
- // Construct the ML pipeline
- val pipeline = new Pipeline().setStages(Array(exploder, assembler, kmeans))
-
- // Compute the model
- val model = pipeline.fit(forML)
-
- // Score the data
- val clusteredCells = model.transform(forML)
-
- clusteredCells.show()
-
- clusteredCells.groupBy("prediction").count().show
-
- // Reassembling the clustering results takes a number of steps.
- val tlm = rf.tileLayerMetadata.left.get
-
- // RasterFrames provides a special aggregation function for assembling tiles from cells with column/row indexes
- val retiled = clusteredCells.groupBy(forML.spatialKeyColumn).agg(
- assemble_tile($"column_index", $"row_index", $"prediction", tlm.tileCols, tlm.tileRows, ByteConstantNoDataCellType)
- )
-
- val clusteredRF = retiled.asRF($"spatial_key", tlm)
-
- val raster3 = clusteredRF.toRaster($"prediction", 774, 500)
-
- val clusterColors = IndexedColorMap.fromColorMap(
- ColorRamps.Viridis.toColorMap((0 until k).toArray)
- )
-
- GeoTiff(raster3).copy(options = GeoTiffOptions(clusterColors)).write("clustered.tiff")
-
- spark.stop()
-}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ExplodeSpec.scala
similarity index 62%
rename from core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/ExplodeSpec.scala
index a06b6444b..da0af2397 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/ExplodeSpec.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
import geotrellis.raster._
import geotrellis.raster.resample.NearestNeighbor
@@ -43,14 +45,14 @@ class ExplodeSpec extends TestEnvironment with TestData {
write(query)
assert(query.select("cell_0", "cell_1").as[(Double, Double)].collect().forall(_ == ((1.0, 2.0))))
val query2 = sql(
- """|select rf_tile_dimensions(tiles) as dims, rf_explode_tiles(tiles) from (
+ """|select rf_dimensions(tiles) as dims, rf_explode_tiles(tiles) from (
|select rf_make_constant_tile(1, 10, 10, 'int8raw') as tiles)
|""".stripMargin)
write(query2)
assert(query2.columns.length === 4)
val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2")
- val exploded = df.select(explode_tiles($"tile1", $"tile2"))
+ val exploded = df.select(rf_explode_tiles($"tile1", $"tile2"))
//exploded.printSchema()
assert(exploded.columns.length === 4)
assert(exploded.count() === 9)
@@ -59,17 +61,17 @@ class ExplodeSpec extends TestEnvironment with TestData {
it("should explode tiles with random sampling") {
val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2")
- val exploded = df.select(explode_tiles_sample(0.5, $"tile1", $"tile2"))
+ val exploded = df.select(rf_explode_tiles_sample(0.5, $"tile1", $"tile2"))
assert(exploded.columns.length === 4)
assert(exploded.count() < 9)
}
it("should handle null tiles") {
val df = Seq[Tile](null, byteArrayTile, null, byteArrayTile, null).toDF("tile1")
- val exploded = df.select(explode_tiles($"tile1"))
+ val exploded = df.select(rf_explode_tiles($"tile1"))
assert(exploded.count === byteArrayTile.size * 2)
val df2 = Seq[(Tile, Tile)]((byteArrayTile, null), (null, byteArrayTile), (byteArrayTile, byteArrayTile)).toDF("tile1", "tile2")
- val exploded2 = df2.select(explode_tiles($"tile1", $"tile2"))
+ val exploded2 = df2.select(rf_explode_tiles($"tile1", $"tile2"))
assert(exploded2.count === byteArrayTile.size * 3)
}
@@ -77,7 +79,7 @@ class ExplodeSpec extends TestEnvironment with TestData {
// Create a tile with a single (wierd) no-data value
val tile: Tile = UShortArrayTile(rangeArray(9, _.toShort), 3, 3, 5.toShort)
val cells = Seq(tile).toDF("tile")
- .select(explode_tiles($"tile"))
+ .select(rf_explode_tiles($"tile"))
.select($"tile".as[Double])
.collect()
@@ -87,7 +89,7 @@ class ExplodeSpec extends TestEnvironment with TestData {
it("should handle user-defined NoData values in tile sampler") {
val tiles = allTileTypes.filter(t ⇒ !t.isInstanceOf[BitArrayTile]).map(_.withNoData(Some(3)))
val cells = tiles.toDF("tile")
- .select(explode_tiles($"tile"))
+ .select(rf_explode_tiles($"tile"))
.select($"tile".as[Double])
.collect()
cells.count(_.isNaN) should be(tiles.size)
@@ -103,53 +105,92 @@ class ExplodeSpec extends TestEnvironment with TestData {
val tile = FloatConstantTile(1.1f, 10, 10, FloatCellType)
val df = Seq[Tile](tile).toDF("tile")
- val arrayDF = df.select(tile_to_array_double($"tile").as[Array[Double]])
+ val arrayDF = df.select(rf_tile_to_array_double($"tile").as[Array[Double]])
arrayDF.first().sum should be (110.0 +- 0.0001)
}
it("should convert an array into a tile") {
- val tile = FloatConstantTile(1.1f, 10, 10, FloatCellType)
+ val tile = TestData.randomTile(10, 10, FloatCellType)
val df = Seq[Tile](tile, null).toDF("tile")
- val arrayDF = df.withColumn("tileArray", tile_to_array_double($"tile"))
+ val arrayDF = df.withColumn("tileArray", rf_tile_to_array_double($"tile"))
- val back = arrayDF.withColumn("backToTile", array_to_tile($"tileArray", 10, 10))
+ val back = arrayDF.withColumn("backToTile", rf_array_to_tile($"tileArray", 10, 10))
val result = back.select($"backToTile".as[Tile]).first
assert(result.toArrayDouble() === tile.toArrayDouble())
- val hasNoData = back.withColumn("with_no_data", with_no_data($"backToTile", 0))
+ // Same round trip, but with SQL expression for rf_array_to_tile
+ val resultSql = arrayDF.selectExpr("rf_array_to_tile(tileArray, 10, 10) as backToTile").as[Tile].first
+
+ assert(resultSql.toArrayDouble() === tile.toArrayDouble())
- val result2 = hasNoData.select($"with_no_data".as[Tile]).first
+ val hasNoData = back.withColumn("withNoData", rf_with_no_data($"backToTile", 0))
+
+ val result2 = hasNoData.select($"withNoData".as[Tile]).first
assert(result2.cellType.asInstanceOf[UserDefinedNoData[_]].noDataValue === 0)
}
it("should reassemble single exploded tile") {
- val df = Seq[Tile](byteArrayTile).toDF("tile")
- .select(explode_tiles($"tile"))
+ val tile = TestData.randomTile(10, 10, FloatCellType)
+ val df = Seq[Tile](tile).toDF("tile")
+ .select(rf_explode_tiles($"tile"))
- val assembled = df.agg(assemble_tile(
+ val assembled = df.agg(
+ rf_assemble_tile(
COLUMN_INDEX_COLUMN,
ROW_INDEX_COLUMN,
TILE_COLUMN,
- 3, 3, byteArrayTile.cellType
+ 10, 10, tile.cellType
)).as[Tile]
val result = assembled.first()
- assert(result === byteArrayTile)
+ assert(result === tile)
+
+ val assembledSqlExpr = df.selectExpr("rf_assemble_tile(column_index, row_index, tile, 10, 10)")
+
+ val resultSql = assembledSqlExpr.as[Tile].first()
+ assert(resultSql === tile)
+
+ checkDocs("rf_assemble_tile")
+ }
+
+ it("should reassemble single exploded tile with user-defined nodata") {
+ val ct = FloatUserDefinedNoDataCellType(-99)
+ val tile = TestData.injectND(3)(TestData.randomTile(5, 5, ct))
+ val df = Seq[Tile](tile).toDF("tile")
+ .select(rf_explode_tiles($"tile"))
+
+ val assembled = df.agg(rf_assemble_tile(
+ COLUMN_INDEX_COLUMN,
+ ROW_INDEX_COLUMN,
+ TILE_COLUMN,
+ 5, 5, ct
+ )).as[Tile]
+
+ val result = assembled.first()
+ assert(result === tile)
+
+ // and with SQL API
+ logger.info(df.schema.treeString)
+
+ val assembledSqlExpr = df.selectExpr(s"rf_convert_cell_type(rf_assemble_tile(column_index, row_index, tile, 5, 5), '${ct.toString()}') as tile")
+
+ val resultSql = assembledSqlExpr.as[Tile].first()
+ assert(resultSql === tile)
+ assert(resultSql.cellType === ct)
}
it("should reassemble multiple exploded tiles") {
val image = sampleSmallGeoTiff
- val tinyTiles = image.projectedRaster.toRF(10, 10)
+ val tinyTiles = image.projectedRaster.toLayer(10, 10)
- val exploded = tinyTiles.select(tinyTiles.spatialKeyColumn, explode_tiles(tinyTiles.tileColumns.head))
-
- //exploded.printSchema()
+ val exploded = tinyTiles.select(tinyTiles.spatialKeyColumn, rf_explode_tiles(tinyTiles.tileColumns.head))
val assembled = exploded.groupBy(tinyTiles.spatialKeyColumn)
- .agg(assemble_tile(
+ .agg(
+ rf_assemble_tile(
COLUMN_INDEX_COLUMN,
ROW_INDEX_COLUMN,
TILE_COLUMN,
@@ -158,7 +199,7 @@ class ExplodeSpec extends TestEnvironment with TestData {
val tlm = tinyTiles.tileLayerMetadata.left.get
- val rf = assembled.asRF(SPATIAL_KEY_COLUMN, tlm)
+ val rf = assembled.asLayer(SPATIAL_KEY_COLUMN, tlm)
val (cols, rows) = image.tile.dimensions
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala
similarity index 64%
rename from core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala
index 81c2d9202..eeea68544 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,16 +15,18 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes
-import astraea.spark.rasterframes.util.SubdivideSupport._
+package org.locationtech.rasterframes
+
import geotrellis.proj4.LatLng
import geotrellis.raster.{ByteCellType, GridBounds, TileLayout}
+import geotrellis.spark.tiling.{CRSWorldExtent, LayoutDefinition}
import geotrellis.spark.{KeyBounds, SpatialKey, TileLayerMetadata}
-import geotrellis.spark.tiling.LayoutDefinition
-import geotrellis.spark.tiling.CRSWorldExtent
+import org.apache.spark.sql.Encoders
+import org.locationtech.rasterframes.util.SubdivideSupport
/**
* Tests miscellaneous extension methods.
@@ -32,20 +34,20 @@ import geotrellis.spark.tiling.CRSWorldExtent
* @since 3/20/18
*/
//noinspection ScalaUnusedSymbol
-class ExtensionMethodSpec extends TestEnvironment with TestData {
- lazy val rf = sampleTileLayerRDD.toRF
+class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSupport {
+ lazy val rf = sampleTileLayerRDD.toLayer
describe("DataFrame exention methods") {
it("should maintain original type") {
val df = rf.withPrefixedColumnNames("_foo_")
- "val rf2: RasterFrame = df" should compile
+ "val rf2: RasterFrameLayer = df" should compile
}
it("should provide tagged column access") {
val df = rf.drop("tile")
"val Some(col) = df.spatialKeyColumn" should compile
}
}
- describe("RasterFrame exention methods") {
+ describe("RasterFrameLayer exention methods") {
it("should provide spatial key column") {
noException should be thrownBy {
rf.spatialKeyColumn
@@ -54,14 +56,28 @@ class ExtensionMethodSpec extends TestEnvironment with TestData {
}
}
describe("Miscellaneous extensions") {
+ import spark.implicits._
+
+ it("should find multiple extent columns") {
+ val df = Seq((extent, "fred", extent, 34.0)).toDF("e1", "s", "e2", "n")
+ df.extentColumns.size should be(2)
+ }
+
+ it("should find multiple crs columns") {
+ // Not sure why implicit resolution isn't handling this properly.
+ implicit val enc = Encoders.tuple(crsEncoder, Encoders.STRING, crsEncoder, Encoders.scalaDouble)
+ val df = Seq((pe.crs, "fred", pe.crs, 34.0)).toDF("c1", "s", "c2", "n")
+ df.crsColumns.size should be(2)
+ }
+
it("should split TileLayout") {
val tl1 = TileLayout(2, 3, 10, 10)
assert(tl1.subdivide(0) === tl1)
assert(tl1.subdivide(1) === tl1)
assert(tl1.subdivide(2) === TileLayout(4, 6, 5, 5))
assertThrows[IllegalArgumentException](tl1.subdivide(-1))
-
}
+
it("should split KeyBounds[SpatialKey]") {
val grid = GridBounds(0, 0, 9, 9)
val kb = KeyBounds(grid)
@@ -76,10 +92,10 @@ class ExtensionMethodSpec extends TestEnvironment with TestData {
it("should split key") {
val s1 = SpatialKey(0, 0).subdivide(2)
- assert(s1 === Seq(SpatialKey(0,0), SpatialKey(1,0), SpatialKey(0,1), SpatialKey(1,1)))
+ assert(s1 === Seq(SpatialKey(0, 0), SpatialKey(1, 0), SpatialKey(0, 1), SpatialKey(1, 1)))
val s2 = SpatialKey(2, 3).subdivide(3)
- assert(s2 === Seq(SpatialKey(6,9), SpatialKey(7,9), SpatialKey(8,9), SpatialKey(6,10), SpatialKey(7,10), SpatialKey(8,10), SpatialKey(6,11), SpatialKey(7,11), SpatialKey(8,11)))
+ assert(s2 === Seq(SpatialKey(6, 9), SpatialKey(7, 9), SpatialKey(8, 9), SpatialKey(6, 10), SpatialKey(7, 10), SpatialKey(8, 10), SpatialKey(6, 11), SpatialKey(7, 11), SpatialKey(8, 11)))
}
it("should split TileLayerMetadata[SpatialKey]") {
@@ -91,7 +107,12 @@ class ExtensionMethodSpec extends TestEnvironment with TestData {
val divided = tlm.subdivide(2)
- assert(divided.tileLayout.tileDimensions === (tileSize/2, tileSize/2))
+ assert(divided.tileLayout.tileDimensions === (tileSize / 2, tileSize / 2))
+ }
+
+ it("should render Markdown") {
+ import org.locationtech.rasterframes.util._
+ rf.toMarkdown().count(_ == '|') shouldBe >=(3 * 5)
}
}
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/JTSSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/GeometryFunctionsSpec.scala
similarity index 52%
rename from core/src/test/scala/astraea/spark/rasterframes/JTSSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/GeometryFunctionsSpec.scala
index 52def8620..54321d0dc 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/JTSSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/GeometryFunctionsSpec.scala
@@ -15,22 +15,27 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
-import com.vividsolutions.jts.geom._
import geotrellis.proj4.{LatLng, Sinusoidal, WebMercator}
-import geotrellis.vector.{Point ⇒ GTPoint}
+import geotrellis.vector.{Extent, Point => GTPoint}
+import org.locationtech.jts.geom._
+import spray.json.JsNumber
/**
* Test rig for operations providing interop with JTS types.
*
* @since 12/16/17
*/
-class JTSSpec extends TestEnvironment with TestData with StandardColumns {
- describe("JTS interop") {
- val rf = l8Sample(1).projectedRaster.toRF(10, 10).withBounds()
+class GeometryFunctionsSpec extends TestEnvironment with TestData with StandardColumns {
+ import spark.implicits._
+
+ describe("Vector geometry operations") {
+ val rf = l8Sample(1).projectedRaster.toLayer(10, 10).withGeometry()
it("should allow joining and filtering of tiles based on points") {
import spark.implicits._
@@ -43,32 +48,32 @@ class JTSSpec extends TestEnvironment with TestData with StandardColumns {
val locs = coords.toDF("id", "point")
withClue("join with point column") {
- assert(rf.join(locs, st_contains(BOUNDS_COLUMN, $"point")).count === coords.length)
- assert(rf.join(locs, st_intersects(BOUNDS_COLUMN, $"point")).count === coords.length)
+ assert(rf.join(locs, st_contains(GEOMETRY_COLUMN, $"point")).count === coords.length)
+ assert(rf.join(locs, st_intersects(GEOMETRY_COLUMN, $"point")).count === coords.length)
}
withClue("point literal") {
val point = coords.head._2
- assert(rf.filter(st_contains(BOUNDS_COLUMN, geomLit(point))).count === 1)
- assert(rf.filter(st_intersects(BOUNDS_COLUMN, geomLit(point))).count === 1)
- assert(rf.filter(BOUNDS_COLUMN intersects point).count === 1)
- assert(rf.filter(BOUNDS_COLUMN intersects GTPoint(point)).count === 1)
- assert(rf.filter(BOUNDS_COLUMN containsGeom point).count === 1)
+ assert(rf.filter(st_contains(GEOMETRY_COLUMN, geomLit(point))).count === 1)
+ assert(rf.filter(st_intersects(GEOMETRY_COLUMN, geomLit(point))).count === 1)
+ assert(rf.filter(GEOMETRY_COLUMN intersects point).count === 1)
+ assert(rf.filter(GEOMETRY_COLUMN intersects GTPoint(point)).count === 1)
+ assert(rf.filter(GEOMETRY_COLUMN containsGeom point).count === 1)
}
withClue("exercise predicates") {
val point = geomLit(coords.head._2)
- assert(rf.filter(st_covers(BOUNDS_COLUMN, point)).count === 1)
- assert(rf.filter(st_crosses(BOUNDS_COLUMN, point)).count === 0)
- assert(rf.filter(st_disjoint(BOUNDS_COLUMN, point)).count === rf.count - 1)
- assert(rf.filter(st_overlaps(BOUNDS_COLUMN, point)).count === 0)
- assert(rf.filter(st_touches(BOUNDS_COLUMN, point)).count === 0)
- assert(rf.filter(st_within(BOUNDS_COLUMN, point)).count === 0)
+ assert(rf.filter(st_covers(GEOMETRY_COLUMN, point)).count === 1)
+ assert(rf.filter(st_crosses(GEOMETRY_COLUMN, point)).count === 0)
+ assert(rf.filter(st_disjoint(GEOMETRY_COLUMN, point)).count === rf.count - 1)
+ assert(rf.filter(st_overlaps(GEOMETRY_COLUMN, point)).count === 0)
+ assert(rf.filter(st_touches(GEOMETRY_COLUMN, point)).count === 0)
+ assert(rf.filter(st_within(GEOMETRY_COLUMN, point)).count === 0)
}
}
it("should allow construction of geometry literals") {
- import JTS._
+ import GeomData._
assert(dfBlank.select(geomLit(point)).first === point)
assert(dfBlank.select(geomLit(line)).first === line)
assert(dfBlank.select(geomLit(poly)).first === poly)
@@ -80,8 +85,8 @@ class JTSSpec extends TestEnvironment with TestData with StandardColumns {
it("should provide a means of getting a bounding box") {
import spark.implicits._
- val boxed = rf.select(BOUNDS_COLUMN, envelope(BOUNDS_COLUMN) as "env")
- assert(boxed.select($"env".as[Envelope]).first.getArea > 0)
+ val boxed = rf.select(GEOMETRY_COLUMN, st_extent(GEOMETRY_COLUMN) as "extent")
+ assert(boxed.select($"extent".as[Extent]).first.area > 0)
assert(boxed.toDF("bounds", "bbox").select("bbox.*").schema.length === 4)
}
@@ -108,9 +113,9 @@ class JTSSpec extends TestEnvironment with TestData with StandardColumns {
val df = Seq((latLng, webMercator)).toDF("ll", "wm")
val rp = df.select(
- reproject_geometry($"ll", LatLng, WebMercator) as "wm2",
- reproject_geometry($"wm", WebMercator, LatLng) as "ll2",
- reproject_geometry(reproject_geometry($"ll", LatLng, Sinusoidal), Sinusoidal, WebMercator) as "wm3"
+ st_reproject($"ll", LatLng, WebMercator) as "wm2",
+ st_reproject($"wm", WebMercator, LatLng) as "ll2",
+ st_reproject(st_reproject($"ll", LatLng, Sinusoidal), Sinusoidal, WebMercator) as "wm3"
).as[(Geometry, Geometry, Geometry)]
@@ -123,9 +128,41 @@ class JTSSpec extends TestEnvironment with TestData with StandardColumns {
df.createOrReplaceTempView("geom")
- val wm4 = sql("SELECT rf_reproject_geometry(ll, '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs', 'EPSG:3857') AS wm4 from geom")
+ val wm4 = sql("SELECT st_reproject(ll, '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs', 'EPSG:3857') AS wm4 from geom")
.as[Geometry].first()
wm4 should matchGeom(webMercator, 0.00001)
+
+ // TODO: See comment in `org.locationtech.rasterframes.expressions.register` for
+ // TODO: what needs to happen to support this.
+ //checkDocs("st_reproject")
}
}
+
+ it("should rasterize geometry") {
+ val rf = l8Sample(1).projectedRaster.toLayer.withGeometry()
+ val df = GeomData.features.map(f ⇒ (
+ f.geom.reproject(LatLng, rf.crs).jtsGeom,
+ f.data.fields("id").asInstanceOf[JsNumber].value.intValue()
+ )).toDF("geom", "__fid__")
+
+ val toRasterize = rf.crossJoin(df)
+
+ val tlm = rf.tileLayerMetadata.merge
+
+ val (cols, rows) = tlm.layout.tileLayout.tileDimensions
+
+ val rasterized = toRasterize.withColumn("rasterized", rf_rasterize($"geom", GEOMETRY_COLUMN, $"__fid__", cols, rows))
+
+ assert(rasterized.count() === df.count() * rf.count())
+ assert(rasterized.select(rf_dimensions($"rasterized")).distinct().count() === 1)
+ val pixelCount = rasterized.select(rf_agg_data_cells($"rasterized")).first()
+ assert(pixelCount < cols * rows)
+
+
+ toRasterize.createOrReplaceTempView("stuff")
+ val viaSQL = sql(s"select rf_rasterize(geom, geometry, __fid__, $cols, $rows) as rasterized from stuff")
+ assert(viaSQL.select(rf_agg_data_cells($"rasterized")).first === pixelCount)
+
+ //rasterized.select($"rasterized".as[Tile]).foreach(t ⇒ t.renderPng(ColorMaps.IGBP).write("target/" + t.hashCode() + ".png"))
+ }
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/MetadataSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/MetadataSpec.scala
similarity index 60%
rename from core/src/test/scala/astraea/spark/rasterframes/MetadataSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/MetadataSpec.scala
index 4960f7e65..0f179937a 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/MetadataSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/MetadataSpec.scala
@@ -1,4 +1,25 @@
-package astraea.spark.rasterframes
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2017 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
import org.apache.spark.sql.types.MetadataBuilder
@@ -14,7 +35,7 @@ class MetadataSpec extends TestEnvironment with TestData {
describe("Metadata storage") {
it("should serialize and attach metadata") {
- //val rf = sampleGeoTiff.projectedRaster.toRF(128, 128)
+ //val rf = sampleGeoTiff.projectedRaster.toLayer(128, 128)
val df = spark.createDataset(Seq((1, "one"), (2, "two"), (3, "three"))).toDF("num", "str")
val withmeta = df.mapColumnAttribute($"num", attr ⇒ {
attr.withMetadata(sampleMetadata)
diff --git a/core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/RasterFrameSpec.scala
similarity index 79%
rename from core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/RasterFrameSpec.scala
index 984da98e7..e77a0fecc 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/RasterFrameSpec.scala
@@ -1,11 +1,32 @@
-package astraea.spark.rasterframes
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
import java.sql.Timestamp
import java.time.ZonedDateTime
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes.util._
import geotrellis.proj4.LatLng
import geotrellis.raster.render.{ColorMap, ColorRamp}
import geotrellis.raster.{ProjectedRaster, Tile, TileFeature, TileLayout, UByteCellType}
@@ -14,11 +35,12 @@ import geotrellis.spark.tiling._
import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{SQLContext, SparkSession}
+import org.locationtech.rasterframes.model.TileDimensions
import scala.util.control.NonFatal
/**
- * RasterFrame test rig.
+ * RasterFrameLayer test rig.
*
* @since 7/10/17
*/
@@ -52,12 +74,12 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
}
}
- describe("RasterFrame") {
+ describe("RasterFrameLayer") {
it("should implicitly convert from spatial layer type") {
val tileLayerRDD = TestData.randomSpatialTileLayerRDD(20, 20, 2, 2)
- val rf = tileLayerRDD.toRF
+ val rf = tileLayerRDD.toLayer
assert(rf.tileColumns.nonEmpty)
assert(rf.spatialKeyColumn.columnName == "spatial_key")
@@ -66,16 +88,14 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
assert(rf.schema.head.metadata.json.contains("tileLayout"))
assert(
- rf.select(tile_dimensions($"tile"))
- .as[Tuple1[(Int, Int)]]
- .map(_._1)
+ rf.select(rf_dimensions($"tile"))
.collect()
- .forall(_ == (10, 10))
+ .forall(_ == TileDimensions(10, 10))
)
assert(rf.count() === 4)
- val cols = tileLayerRDD.toRF("foo").columns
+ val cols = tileLayerRDD.toLayer("foo").columns
assert(!cols.contains("tile"))
assert(cols.contains("foo"))
}
@@ -84,7 +104,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
val tileLayerRDD = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2)
- val rf = tileLayerRDD.toRF
+ val rf = tileLayerRDD.toLayer
try {
assert(rf.tileColumns.nonEmpty)
@@ -93,11 +113,10 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
}
catch {
case NonFatal(ex) ⇒
- rf.printSchema()
println(rf.schema.prettyJson)
throw ex
}
- val cols = tileLayerRDD.toRF("foo").columns
+ val cols = tileLayerRDD.toLayer("foo").columns
assert(!cols.contains("tile"))
assert(cols.contains("foo"))
}
@@ -116,7 +135,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
val tileLayerRDD = TileFeatureLayerRDD(tileRDD, metadata)
- val rf = tileLayerRDD.toRF
+ val rf = tileLayerRDD.toLayer
assert(rf.columns.toSet === Set(SPATIAL_KEY_COLUMN, TILE_COLUMN, TILE_FEATURE_DATA_COLUMN).map(_.columnName))
}
@@ -135,14 +154,14 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
val tileLayerRDD = TileFeatureLayerRDD(tileRDD, metadata)
- val rf = tileLayerRDD.toRF
+ val rf = tileLayerRDD.toLayer
assert(rf.columns.toSet === Set(SPATIAL_KEY_COLUMN, TEMPORAL_KEY_COLUMN, TILE_COLUMN, TILE_FEATURE_DATA_COLUMN).map(_.columnName))
}
it("should support adding a timestamp column") {
val now = ZonedDateTime.now()
- val rf = sampleGeoTiff.projectedRaster.toRF(256, 256)
+ val rf = sampleGeoTiff.projectedRaster.toLayer(256, 256)
val wt = rf.addTemporalComponent(now)
val goodie = wt.withTimestamp()
assert(goodie.columns.contains("timestamp"))
@@ -153,7 +172,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
}
it("should support spatial joins") {
- val rf = sampleGeoTiff.projectedRaster.toRF(256, 256)
+ val rf = sampleGeoTiff.projectedRaster.toLayer(256, 256)
val wt = rf.addTemporalComponent(TemporalKey(34))
@@ -167,11 +186,11 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
}
it("should have correct schema on inner spatial joins") {
- val left = sampleGeoTiff.projectedRaster.toRF(256, 256)
+ val left = sampleGeoTiff.projectedRaster.toLayer(256, 256)
.addTemporalComponent(TemporalKey(34))
val right = left.withColumnRenamed(left.tileColumns.head.columnName, "rightTile")
- .asRF
+ .asLayer
val joined = left.spatialJoin(right)
// since right is a copy of left, should not drop any rows with inner join
@@ -180,22 +199,25 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
// Should use left's key column names
assert(joined.spatialKeyColumn.columnName === left.spatialKeyColumn.columnName)
assert(joined.temporalKeyColumn.map(_.columnName) === left.temporalKeyColumn.map(_.columnName))
-
+ assert(joined.tileColumns.size === 2)
+ assert(joined.notTileColumns.size === 2)
+ assert(joined.tileColumns.toSet === joined.tileColumns.toSet)
+ assert(joined.tileColumns.toSet !== joined.notTileColumns.toSet)
}
- it("should convert a GeoTiff to RasterFrame") {
+ it("should convert a GeoTiff to RasterFrameLayer") {
val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster
val (cols, rows) = praster.raster.dimensions
val layoutCols = math.ceil(cols / 128.0).toInt
val layoutRows = math.ceil(rows / 128.0).toInt
- assert(praster.toRF.count() === 1)
- assert(praster.toRF(128, 128).count() === (layoutCols * layoutRows))
+ assert(praster.toLayer.count() === 1)
+ assert(praster.toLayer(128, 128).count() === (layoutCols * layoutRows))
}
it("should provide TileLayerMetadata[SpatialKey]") {
- val rf = sampleGeoTiff.projectedRaster.toRF(256, 256)
+ val rf = sampleGeoTiff.projectedRaster.toLayer(256, 256)
val tlm = rf.tileLayerMetadata.merge
val bounds = tlm.bounds.get
assert(bounds === KeyBounds(SpatialKey(0, 0), SpatialKey(3, 1)))
@@ -203,7 +225,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
it("should provide TileLayerMetadata[SpaceTimeKey]") {
val now = ZonedDateTime.now()
- val rf = sampleGeoTiff.projectedRaster.toRF(256, 256, now)
+ val rf = sampleGeoTiff.projectedRaster.toLayer(256, 256, now)
val tlm = rf.tileLayerMetadata.merge
val bounds = tlm.bounds.get
assert(bounds._1 === SpaceTimeKey(0, 0, now))
@@ -213,7 +235,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
// it("should clip TileLayerMetadata extent") {
// val tiled = sampleTileLayerRDD
//
-// val rf = tiled.reproject(LatLng, tiled.metadata.layout)._2.toRF
+// val rf = tiled.reproject(LatLng, tiled.metadata.layout)._2.toLayer
//
// val worldish = Extent(-179, -89, 179, 89)
// val areaish = Extent(-90, 30, -81, 40)
@@ -240,14 +262,14 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
}
it("shouldn't clip already clipped extents") {
- val rf = TestData.randomSpatialTileLayerRDD(1024, 1024, 8, 8).toRF
+ val rf = TestData.randomSpatialTileLayerRDD(1024, 1024, 8, 8).toLayer
val expected = rf.tileLayerMetadata.merge.extent
val computed = rf.clipLayerExtent.tileLayerMetadata.merge.extent
basicallySame(expected, computed)
val pr = sampleGeoTiff.projectedRaster
- val rf2 = pr.toRF(256, 256)
+ val rf2 = pr.toLayer(256, 256)
val expected2 = rf2.tileLayerMetadata.merge.extent
val computed2 = rf2.clipLayerExtent.tileLayerMetadata.merge.extent
basicallySame(expected2, computed2)
@@ -272,7 +294,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
}
it("should rasterize with a spatiotemporal key") {
- val rf = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toRF
+ val rf = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toLayer
noException shouldBe thrownBy {
rf.toRaster($"tile", 128, 128)
@@ -280,8 +302,8 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
}
it("should maintain metadata after all spatial join operations") {
- val rf1 = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toRF
- val rf2 = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toRF
+ val rf1 = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toLayer
+ val rf2 = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toLayer
val joinTypes = Seq("inner", "outer", "fullouter", "left_outer", "right_outer", "leftsemi")
forEvery(joinTypes) { jt ⇒
@@ -293,9 +315,9 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
it("should rasterize multiband") {
withClue("Landsat") {
- val blue = TestData.l8Sample(1).projectedRaster.toRF.withRFColumnRenamed("tile", "blue")
- val green = TestData.l8Sample(2).projectedRaster.toRF.withRFColumnRenamed("tile", "green")
- val red = TestData.l8Sample(3).projectedRaster.toRF.withRFColumnRenamed("tile", "red")
+ val blue = TestData.l8Sample(1).projectedRaster.toLayer.withRFColumnRenamed("tile", "blue")
+ val green = TestData.l8Sample(2).projectedRaster.toLayer.withRFColumnRenamed("tile", "green")
+ val red = TestData.l8Sample(3).projectedRaster.toLayer.withRFColumnRenamed("tile", "red")
val joined = blue.spatialJoin(green).spatialJoin(red)
@@ -306,9 +328,9 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
}
}
withClue("NAIP") {
- val red = TestData.naipSample(1).projectedRaster.toRF.withRFColumnRenamed("tile", "red")
- val green = TestData.naipSample(2).projectedRaster.toRF.withRFColumnRenamed("tile", "green")
- val blue = TestData.naipSample(3).projectedRaster.toRF.withRFColumnRenamed("tile", "blue")
+ val red = TestData.naipSample(1).projectedRaster.toLayer.withRFColumnRenamed("tile", "red")
+ val green = TestData.naipSample(2).projectedRaster.toLayer.withRFColumnRenamed("tile", "green")
+ val blue = TestData.naipSample(3).projectedRaster.toLayer.withRFColumnRenamed("tile", "blue")
val joined = blue.spatialJoin(green).spatialJoin(red)
noException shouldBe thrownBy {
@@ -323,7 +345,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
// 774 × 500
val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster
val (cols, rows) = praster.raster.dimensions
- val rf = praster.toRF(64, 64)
+ val rf = praster.toLayer(64, 64)
val raster = rf.toRaster($"tile", cols, rows)
render(raster.tile, "normal")
@@ -344,7 +366,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
it("shouldn't restitch raster that's has derived tiles") {
val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster
- val rf = praster.toRF(64, 64)
+ val rf = praster.toLayer(64, 64)
val equalizer = udf((t: Tile) => t.equalize())
@@ -352,13 +374,13 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys
intercept[IllegalArgumentException] {
// spatial_key is lost
- equalized.asRF.toRaster($"equalized", 128, 128)
+ equalized.asLayer.toRaster($"equalized", 128, 128)
}
}
it("should fetch CRS") {
val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster
- val rf = praster.toRF
+ val rf = praster.toLayer
assert(rf.crs === praster.crs)
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/RasterFunctionsSpec.scala
similarity index 55%
rename from core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/RasterFunctionsSpec.scala
index da2ab9c56..caccd74ca 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/RasterFunctionsSpec.scala
@@ -19,22 +19,22 @@
*
*/
-package astraea.spark.rasterframes
-import astraea.spark.rasterframes.TestData.injectND
-import astraea.spark.rasterframes.expressions.accessors.ExtractTile
-import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics}
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
+package org.locationtech.rasterframes
+
import geotrellis.proj4.LatLng
import geotrellis.raster
import geotrellis.raster.testkit.RasterMatchers
-import geotrellis.raster.{ArrayTile, BitCellType, ByteUserDefinedNoDataCellType, DoubleConstantNoDataCellType, ShortConstantNoDataCellType, Tile, UByteConstantNoDataCellType}
+import geotrellis.raster._
import geotrellis.vector.Extent
-import org.apache.spark.sql.{AnalysisException, Encoders}
+import org.apache.spark.sql.Encoders
import org.apache.spark.sql.functions._
-import org.scalatest.{FunSpec, Matchers}
+import org.locationtech.rasterframes.expressions.accessors.ExtractTile
+import org.locationtech.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.ref.{RasterRef, RasterSource}
+import org.locationtech.rasterframes.stats._
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
-class RasterFunctionsSpec extends FunSpec
- with TestEnvironment with Matchers with RasterMatchers {
+class RasterFunctionsSpec extends TestEnvironment with RasterMatchers {
import spark.implicits._
val extent = Extent(10, 20, 30, 40)
@@ -51,66 +51,95 @@ class RasterFunctionsSpec extends FunSpec
lazy val three = TestData.projectedRasterTile(cols, rows, 3, extent, crs, ct)
lazy val six = ProjectedRasterTile(three * two, three.extent, three.crs)
lazy val nd = TestData.projectedRasterTile(cols, rows, -2, extent, crs, ct)
- lazy val randTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextInt(), extent, crs, ct)
- lazy val randNDTile = TestData.injectND(numND)(randTile)
+ lazy val randPRT = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextInt(), extent, crs, ct)
+ lazy val randNDPRT = TestData.injectND(numND)(randPRT)
lazy val randDoubleTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextGaussian(), extent, crs, DoubleConstantNoDataCellType)
lazy val randDoubleNDTile = TestData.injectND(numND)(randDoubleTile)
lazy val randPositiveDoubleTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextDouble() + 1e-6, extent, crs, DoubleConstantNoDataCellType)
- val expectedRandNoData: Long = numND * tileCount
+ val expectedRandNoData: Long = numND * tileCount.toLong
val expectedRandData: Long = cols * rows * tileCount - expectedRandNoData
- lazy val randNDTilesWithNull = Seq.fill[Tile](tileCount)(injectND(numND)(
+ lazy val randNDTilesWithNull = Seq.fill[Tile](tileCount)(TestData.injectND(numND)(
TestData.randomTile(cols, rows, UByteConstantNoDataCellType)
)).map(ProjectedRasterTile(_, extent, crs)) :+ null
+ def lazyPRT = RasterRef(RasterSource(TestData.l8samplePath), 0, None).tile
+
implicit val pairEnc = Encoders.tuple(ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder)
implicit val tripEnc = Encoders.tuple(ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder)
+ describe("constant tile generation operations") {
+ val dim = 2
+ val rows = 2
+
+ it("should create a ones tile") {
+ val df = (0 until rows).toDF("id")
+ .withColumn("const", rf_make_ones_tile(dim, dim, IntConstantNoDataCellType))
+ val result = df.select(rf_tile_sum($"const") as "ts").agg(sum("ts")).as[Double].first()
+ result should be (dim * dim * rows)
+ }
+
+ it("should create a zeros tile") {
+ val df = (0 until rows).toDF("id")
+ .withColumn("const", rf_make_zeros_tile(dim, dim, FloatConstantNoDataCellType))
+ val result = df.select(rf_tile_sum($"const") as "ts").agg(sum("ts")).as[Double].first()
+ result should be (0)
+ }
+
+ it("should create an arbitrary constant tile") {
+ val value = 4
+ val df = (0 until rows).toDF("id")
+ .withColumn("const", rf_make_constant_tile(value, dim, dim, ByteConstantNoDataCellType))
+ val result = df.select(rf_tile_sum($"const") as "ts").agg(sum("ts")).as[Double].first()
+ result should be (dim * dim * rows * value)
+ }
+ }
+
describe("arithmetic tile operations") {
it("should local_add") {
val df = Seq((one, two)).toDF("one", "two")
- val maybeThree = df.select(local_add($"one", $"two")).as[ProjectedRasterTile]
+ val maybeThree = df.select(rf_local_add($"one", $"two")).as[ProjectedRasterTile]
assertEqual(maybeThree.first(), three)
assertEqual(df.selectExpr("rf_local_add(one, two)").as[ProjectedRasterTile].first(), three)
- val maybeThreeTile = df.select(local_add(ExtractTile($"one"), ExtractTile($"two"))).as[Tile]
+ val maybeThreeTile = df.select(rf_local_add(ExtractTile($"one"), ExtractTile($"two"))).as[Tile]
assertEqual(maybeThreeTile.first(), three.toArrayTile())
checkDocs("rf_local_add")
}
- it("should local_subtract") {
+ it("should rf_local_subtract") {
val df = Seq((three, two)).toDF("three", "two")
- val maybeOne = df.select(local_subtract($"three", $"two")).as[ProjectedRasterTile]
+ val maybeOne = df.select(rf_local_subtract($"three", $"two")).as[ProjectedRasterTile]
assertEqual(maybeOne.first(), one)
assertEqual(df.selectExpr("rf_local_subtract(three, two)").as[ProjectedRasterTile].first(), one)
val maybeOneTile =
- df.select(local_subtract(ExtractTile($"three"), ExtractTile($"two"))).as[Tile]
+ df.select(rf_local_subtract(ExtractTile($"three"), ExtractTile($"two"))).as[Tile]
assertEqual(maybeOneTile.first(), one.toArrayTile())
checkDocs("rf_local_subtract")
}
- it("should local_multiply") {
+ it("should rf_local_multiply") {
val df = Seq((three, two)).toDF("three", "two")
- val maybeSix = df.select(local_multiply($"three", $"two")).as[ProjectedRasterTile]
+ val maybeSix = df.select(rf_local_multiply($"three", $"two")).as[ProjectedRasterTile]
assertEqual(maybeSix.first(), six)
assertEqual(df.selectExpr("rf_local_multiply(three, two)").as[ProjectedRasterTile].first(), six)
val maybeSixTile =
- df.select(local_multiply(ExtractTile($"three"), ExtractTile($"two"))).as[Tile]
+ df.select(rf_local_multiply(ExtractTile($"three"), ExtractTile($"two"))).as[Tile]
assertEqual(maybeSixTile.first(), six.toArrayTile())
checkDocs("rf_local_multiply")
}
- it("should local_divide") {
+ it("should rf_local_divide") {
val df = Seq((six, two)).toDF("six", "two")
- val maybeThree = df.select(local_divide($"six", $"two")).as[ProjectedRasterTile]
+ val maybeThree = df.select(rf_local_divide($"six", $"two")).as[ProjectedRasterTile]
assertEqual(maybeThree.first(), three)
assertEqual(df.selectExpr("rf_local_divide(six, two)").as[ProjectedRasterTile].first(), three)
@@ -119,151 +148,176 @@ class RasterFunctionsSpec extends FunSpec
.as[ProjectedRasterTile].first(), six)
val maybeThreeTile =
- df.select(local_divide(ExtractTile($"six"), ExtractTile($"two"))).as[Tile]
+ df.select(rf_local_divide(ExtractTile($"six"), ExtractTile($"two"))).as[Tile]
assertEqual(maybeThreeTile.first(), three.toArrayTile())
checkDocs("rf_local_divide")
}
}
describe("scalar tile operations") {
- it("should local_add") {
+ it("should rf_local_add") {
val df = Seq(one).toDF("one")
- val maybeThree = df.select(local_add($"one", 2)).as[ProjectedRasterTile]
+ val maybeThree = df.select(rf_local_add($"one", 2)).as[ProjectedRasterTile]
assertEqual(maybeThree.first(), three)
- val maybeThreeD = df.select(local_add($"one", 2.1)).as[ProjectedRasterTile]
+ val maybeThreeD = df.select(rf_local_add($"one", 2.1)).as[ProjectedRasterTile]
assertEqual(maybeThreeD.first(), three.convert(DoubleConstantNoDataCellType).localAdd(0.1))
- val maybeThreeTile = df.select(local_add(ExtractTile($"one"), 2)).as[Tile]
+ val maybeThreeTile = df.select(rf_local_add(ExtractTile($"one"), 2)).as[Tile]
assertEqual(maybeThreeTile.first(), three.toArrayTile())
}
- it("should local_subtract") {
+ it("should rf_local_subtract") {
val df = Seq(three).toDF("three")
- val maybeOne = df.select(local_subtract($"three", 2)).as[ProjectedRasterTile]
+ val maybeOne = df.select(rf_local_subtract($"three", 2)).as[ProjectedRasterTile]
assertEqual(maybeOne.first(), one)
- val maybeOneD = df.select(local_subtract($"three", 2.0)).as[ProjectedRasterTile]
+ val maybeOneD = df.select(rf_local_subtract($"three", 2.0)).as[ProjectedRasterTile]
assertEqual(maybeOneD.first(), one)
- val maybeOneTile = df.select(local_subtract(ExtractTile($"three"), 2)).as[Tile]
+ val maybeOneTile = df.select(rf_local_subtract(ExtractTile($"three"), 2)).as[Tile]
assertEqual(maybeOneTile.first(), one.toArrayTile())
}
- it("should local_multiply") {
+ it("should rf_local_multiply") {
val df = Seq(three).toDF("three")
- val maybeSix = df.select(local_multiply($"three", 2)).as[ProjectedRasterTile]
+ val maybeSix = df.select(rf_local_multiply($"three", 2)).as[ProjectedRasterTile]
assertEqual(maybeSix.first(), six)
- val maybeSixD = df.select(local_multiply($"three", 2.0)).as[ProjectedRasterTile]
+ val maybeSixD = df.select(rf_local_multiply($"three", 2.0)).as[ProjectedRasterTile]
assertEqual(maybeSixD.first(), six)
- val maybeSixTile = df.select(local_multiply(ExtractTile($"three"), 2)).as[Tile]
+ val maybeSixTile = df.select(rf_local_multiply(ExtractTile($"three"), 2)).as[Tile]
assertEqual(maybeSixTile.first(), six.toArrayTile())
}
- it("should local_divide") {
+ it("should rf_local_divide") {
val df = Seq(six).toDF("six")
- val maybeThree = df.select(local_divide($"six", 2)).as[ProjectedRasterTile]
+ val maybeThree = df.select(rf_local_divide($"six", 2)).as[ProjectedRasterTile]
assertEqual(maybeThree.first(), three)
- val maybeThreeD = df.select(local_divide($"six", 2.0)).as[ProjectedRasterTile]
+ val maybeThreeD = df.select(rf_local_divide($"six", 2.0)).as[ProjectedRasterTile]
assertEqual(maybeThreeD.first(), three)
- val maybeThreeTile = df.select(local_divide(ExtractTile($"six"), 2)).as[Tile]
+ val maybeThreeTile = df.select(rf_local_divide(ExtractTile($"six"), 2)).as[Tile]
assertEqual(maybeThreeTile.first(), three.toArrayTile())
}
}
describe("tile comparison relations") {
- it("should evaluate local_less") {
+ it("should evaluate rf_local_less") {
val df = Seq((two, three, six)).toDF("two", "three", "six")
- df.select(tile_sum(local_less($"two", 6))).first() should be(100.0)
- df.select(tile_sum(local_less($"two", 1.9))).first() should be(0.0)
- df.select(tile_sum(local_less($"two", 2))).first() should be(0.0)
- df.select(tile_sum(local_less($"three", $"two"))).first() should be(0.0)
- df.select(tile_sum(local_less($"three", $"three"))).first() should be(0.0)
- df.select(tile_sum(local_less($"three", $"six"))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_less($"two", 6))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_less($"two", 1.9))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_less($"two", 2))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_less($"three", $"two"))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_less($"three", $"three"))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_less($"three", $"six"))).first() should be(100.0)
df.selectExpr("rf_tile_sum(rf_local_less(two, 6))").as[Double].first() should be(100.0)
df.selectExpr("rf_tile_sum(rf_local_less(three, three))").as[Double].first() should be(0.0)
checkDocs("rf_local_less")
}
- it("should evaluate local_less_equal") {
+ it("should evaluate rf_local_less_equal") {
val df = Seq((two, three, six)).toDF("two", "three", "six")
- df.select(tile_sum(local_less_equal($"two", 6))).first() should be(100.0)
- df.select(tile_sum(local_less_equal($"two", 1.9))).first() should be(0.0)
- df.select(tile_sum(local_less_equal($"two", 2))).first() should be(100.0)
- df.select(tile_sum(local_less_equal($"three", $"two"))).first() should be(0.0)
- df.select(tile_sum(local_less_equal($"three", $"three"))).first() should be(100.0)
- df.select(tile_sum(local_less_equal($"three", $"six"))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_less_equal($"two", 6))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_less_equal($"two", 1.9))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_less_equal($"two", 2))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_less_equal($"three", $"two"))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_less_equal($"three", $"three"))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_less_equal($"three", $"six"))).first() should be(100.0)
df.selectExpr("rf_tile_sum(rf_local_less_equal(two, 6))").as[Double].first() should be(100.0)
df.selectExpr("rf_tile_sum(rf_local_less_equal(three, three))").as[Double].first() should be(100.0)
checkDocs("rf_local_less_equal")
}
- it("should evaluate local_greater") {
+ it("should evaluate rf_local_greater") {
val df = Seq((two, three, six)).toDF("two", "three", "six")
- df.select(tile_sum(local_greater($"two", 6))).first() should be(0.0)
- df.select(tile_sum(local_greater($"two", 1.9))).first() should be(100.0)
- df.select(tile_sum(local_greater($"two", 2))).first() should be(0.0)
- df.select(tile_sum(local_greater($"three", $"two"))).first() should be(100.0)
- df.select(tile_sum(local_greater($"three", $"three"))).first() should be(0.0)
- df.select(tile_sum(local_greater($"three", $"six"))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_greater($"two", 6))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_greater($"two", 1.9))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_greater($"two", 2))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_greater($"three", $"two"))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_greater($"three", $"three"))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_greater($"three", $"six"))).first() should be(0.0)
df.selectExpr("rf_tile_sum(rf_local_greater(two, 1.9))").as[Double].first() should be(100.0)
df.selectExpr("rf_tile_sum(rf_local_greater(three, three))").as[Double].first() should be(0.0)
checkDocs("rf_local_greater")
}
- it("should evaluate local_greater_equal") {
+ it("should evaluate rf_local_greater_equal") {
val df = Seq((two, three, six)).toDF("two", "three", "six")
- df.select(tile_sum(local_greater_equal($"two", 6))).first() should be(0.0)
- df.select(tile_sum(local_greater_equal($"two", 1.9))).first() should be(100.0)
- df.select(tile_sum(local_greater_equal($"two", 2))).first() should be(100.0)
- df.select(tile_sum(local_greater_equal($"three", $"two"))).first() should be(100.0)
- df.select(tile_sum(local_greater_equal($"three", $"three"))).first() should be(100.0)
- df.select(tile_sum(local_greater_equal($"three", $"six"))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_greater_equal($"two", 6))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_greater_equal($"two", 1.9))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_greater_equal($"two", 2))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_greater_equal($"three", $"two"))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_greater_equal($"three", $"three"))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_greater_equal($"three", $"six"))).first() should be(0.0)
df.selectExpr("rf_tile_sum(rf_local_greater_equal(two, 1.9))").as[Double].first() should be(100.0)
df.selectExpr("rf_tile_sum(rf_local_greater_equal(three, three))").as[Double].first() should be(100.0)
checkDocs("rf_local_greater_equal")
}
- it("should evaluate local_equal") {
+ it("should evaluate rf_local_equal") {
val df = Seq((two, three, three)).toDF("two", "threeA", "threeB")
- df.select(tile_sum(local_equal($"two", 2))).first() should be(100.0)
- df.select(tile_sum(local_equal($"two", 2.1))).first() should be(0.0)
- df.select(tile_sum(local_equal($"two", $"threeA"))).first() should be(0.0)
- df.select(tile_sum(local_equal($"threeA", $"threeB"))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_equal($"two", 2))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_equal($"two", 2.1))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_equal($"two", $"threeA"))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_equal($"threeA", $"threeB"))).first() should be(100.0)
df.selectExpr("rf_tile_sum(rf_local_equal(two, 1.9))").as[Double].first() should be(0.0)
df.selectExpr("rf_tile_sum(rf_local_equal(threeA, threeB))").as[Double].first() should be(100.0)
checkDocs("rf_local_equal")
}
- it("should evaluate local_unequal") {
+ it("should evaluate rf_local_unequal") {
val df = Seq((two, three, three)).toDF("two", "threeA", "threeB")
- df.select(tile_sum(local_unequal($"two", 2))).first() should be(0.0)
- df.select(tile_sum(local_unequal($"two", 2.1))).first() should be(100.0)
- df.select(tile_sum(local_unequal($"two", $"threeA"))).first() should be(100.0)
- df.select(tile_sum(local_unequal($"threeA", $"threeB"))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_unequal($"two", 2))).first() should be(0.0)
+ df.select(rf_tile_sum(rf_local_unequal($"two", 2.1))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_unequal($"two", $"threeA"))).first() should be(100.0)
+ df.select(rf_tile_sum(rf_local_unequal($"threeA", $"threeB"))).first() should be(0.0)
df.selectExpr("rf_tile_sum(rf_local_unequal(two, 1.9))").as[Double].first() should be(100.0)
df.selectExpr("rf_tile_sum(rf_local_unequal(threeA, threeB))").as[Double].first() should be(0.0)
checkDocs("rf_local_unequal")
}
}
+ describe("raster metadata") {
+ it("should get the TileDimensions of a Tile") {
+ val t = Seq(randPRT).toDF("tile").select(rf_dimensions($"tile")).first()
+ t should be (TileDimensions(randPRT.dimensions))
+ checkDocs("rf_dimensions")
+ }
+ it("should get the Extent of a ProjectedRasterTile") {
+ val e = Seq(randPRT).toDF("tile").select(rf_extent($"tile")).first()
+ e should be (extent)
+ checkDocs("rf_extent")
+ }
+
+ it("should get the CRS of a ProjectedRasterTile") {
+ val e = Seq(randPRT).toDF("tile").select(rf_crs($"tile")).first()
+ e should be (crs)
+ checkDocs("rf_crs")
+ }
+
+ it("should get the Geometry of a ProjectedRasterTile") {
+ val g = Seq(randPRT).toDF("tile").select(rf_geometry($"tile")).first()
+ g should be (extent.jtsGeom)
+ checkDocs("rf_geometry")
+ }
+ }
+
describe("per-tile stats") {
it("should compute data cell counts") {
val df = Seq(TestData.injectND(numND)(two)).toDF("two")
- df.select(data_cells($"two")).first() shouldBe (cols * rows - numND).toLong
+ df.select(rf_data_cells($"two")).first() shouldBe (cols * rows - numND).toLong
val df2 = randNDTilesWithNull.toDF("tile")
- df2.select(data_cells($"tile") as "cells")
+ df2.select(rf_data_cells($"tile") as "cells")
.agg(sum("cells"))
.as[Long]
.first() should be (expectedRandData)
@@ -272,10 +326,10 @@ class RasterFunctionsSpec extends FunSpec
}
it("should compute no-data cell counts") {
val df = Seq(TestData.injectND(numND)(two)).toDF("two")
- df.select(no_data_cells($"two")).first() should be(numND)
+ df.select(rf_no_data_cells($"two")).first() should be(numND)
val df2 = randNDTilesWithNull.toDF("tile")
- df2.select(no_data_cells($"tile") as "cells")
+ df2.select(rf_no_data_cells($"tile") as "cells")
.agg(sum("cells"))
.as[Long]
.first() should be (expectedRandNoData)
@@ -284,40 +338,56 @@ class RasterFunctionsSpec extends FunSpec
}
it("should detect no-data tiles") {
val df = Seq(nd).toDF("nd")
- df.select(is_no_data_tile($"nd")).first() should be(true)
+ df.select(rf_is_no_data_tile($"nd")).first() should be(true)
val df2 = Seq(two).toDF("not_nd")
- df2.select(is_no_data_tile($"not_nd")).first() should be(false)
+ df2.select(rf_is_no_data_tile($"not_nd")).first() should be(false)
checkDocs("rf_is_no_data_tile")
}
+
+ it("should evaluate exists and for_all") {
+ val df0 = Seq(zero).toDF("tile")
+ df0.select(rf_exists($"tile")).first() should be(false)
+ df0.select(rf_for_all($"tile")).first() should be(false)
+
+ Seq(one).toDF("tile").select(rf_exists($"tile")).first() should be(true)
+ Seq(one).toDF("tile").select(rf_for_all($"tile")).first() should be(true)
+
+ val dfNd = Seq(TestData.injectND(1)(one)).toDF("tile")
+ dfNd.select(rf_exists($"tile")).first() should be(true)
+ dfNd.select(rf_for_all($"tile")).first() should be(false)
+
+ checkDocs("rf_exists")
+ checkDocs("rf_for_all")
+ }
it("should find the minimum cell value") {
- val min = randNDTile.toArray().filter(c => raster.isData(c)).min.toDouble
- val df = Seq(randNDTile).toDF("rand")
- df.select(tile_min($"rand")).first() should be(min)
+ val min = randNDPRT.toArray().filter(c => raster.isData(c)).min.toDouble
+ val df = Seq(randNDPRT).toDF("rand")
+ df.select(rf_tile_min($"rand")).first() should be(min)
df.selectExpr("rf_tile_min(rand)").as[Double].first() should be(min)
checkDocs("rf_tile_min")
}
it("should find the maximum cell value") {
- val max = randNDTile.toArray().filter(c => raster.isData(c)).max.toDouble
- val df = Seq(randNDTile).toDF("rand")
- df.select(tile_max($"rand")).first() should be(max)
+ val max = randNDPRT.toArray().filter(c => raster.isData(c)).max.toDouble
+ val df = Seq(randNDPRT).toDF("rand")
+ df.select(rf_tile_max($"rand")).first() should be(max)
df.selectExpr("rf_tile_max(rand)").as[Double].first() should be(max)
checkDocs("rf_tile_max")
}
it("should compute the tile mean cell value") {
- val values = randNDTile.toArray().filter(c => raster.isData(c))
+ val values = randNDPRT.toArray().filter(c => raster.isData(c))
val mean = values.sum.toDouble / values.length
- val df = Seq(randNDTile).toDF("rand")
- df.select(tile_mean($"rand")).first() should be(mean)
+ val df = Seq(randNDPRT).toDF("rand")
+ df.select(rf_tile_mean($"rand")).first() should be(mean)
df.selectExpr("rf_tile_mean(rand)").as[Double].first() should be(mean)
checkDocs("rf_tile_mean")
}
it("should compute the tile summary statistics") {
- val values = randNDTile.toArray().filter(c => raster.isData(c))
+ val values = randNDPRT.toArray().filter(c => raster.isData(c))
val mean = values.sum.toDouble / values.length
- val df = Seq(randNDTile).toDF("rand")
- val stats = df.select(tile_stats($"rand")).first()
+ val df = Seq(randNDPRT).toDF("rand")
+ val stats = df.select(rf_tile_stats($"rand")).first()
stats.mean should be (mean +- 0.00001)
val stats2 = df.selectExpr("rf_tile_stats(rand) as stats")
@@ -325,7 +395,7 @@ class RasterFunctionsSpec extends FunSpec
.first()
stats2 should be (stats)
- df.select(tile_stats($"rand") as "stats")
+ df.select(rf_tile_stats($"rand") as "stats")
.select($"stats.mean").as[Double]
.first() should be(mean +- 0.00001)
df.selectExpr("rf_tile_stats(rand) as stats")
@@ -334,7 +404,7 @@ class RasterFunctionsSpec extends FunSpec
val df2 = randNDTilesWithNull.toDF("tile")
df2
- .select(tile_stats($"tile")("data_cells") as "cells")
+ .select(rf_tile_stats($"tile")("data_cells") as "cells")
.agg(sum("cells"))
.as[Long]
.first() should be (expectedRandData)
@@ -343,8 +413,8 @@ class RasterFunctionsSpec extends FunSpec
}
it("should compute the tile histogram") {
- val df = Seq(randNDTile).toDF("rand")
- val h1 = df.select(tile_histogram($"rand")).first()
+ val df = Seq(randNDPRT).toDF("rand")
+ val h1 = df.select(rf_tile_histogram($"rand")).first()
val h2 = df.selectExpr("rf_tile_histogram(rand) as hist")
.select($"hist".as[CellHistogram])
@@ -359,14 +429,14 @@ class RasterFunctionsSpec extends FunSpec
describe("aggregate statistics") {
it("should count data cells") {
val df = randNDTilesWithNull.filter(_ != null).toDF("tile")
- df.select(agg_data_cells($"tile")).first() should be (expectedRandData)
+ df.select(rf_agg_data_cells($"tile")).first() should be (expectedRandData)
df.selectExpr("rf_agg_data_cells(tile)").as[Long].first() should be (expectedRandData)
checkDocs("rf_agg_data_cells")
}
it("should count no-data cells") {
val df = randNDTilesWithNull.toDF("tile")
- df.select(agg_no_data_cells($"tile")).first() should be (expectedRandNoData)
+ df.select(rf_agg_no_data_cells($"tile")).first() should be (expectedRandNoData)
df.selectExpr("rf_agg_no_data_cells(tile)").as[Long].first() should be (expectedRandNoData)
checkDocs("rf_agg_no_data_cells")
}
@@ -375,7 +445,7 @@ class RasterFunctionsSpec extends FunSpec
val df = randNDTilesWithNull.toDF("tile")
df
- .select(agg_stats($"tile") as "stats")
+ .select(rf_agg_stats($"tile") as "stats")
.select("stats.data_cells", "stats.no_data_cells")
.as[(Long, Long)]
.first() should be ((expectedRandData, expectedRandNoData))
@@ -389,7 +459,7 @@ class RasterFunctionsSpec extends FunSpec
it("should compute a aggregate histogram") {
val df = randNDTilesWithNull.toDF("tile")
- val hist1 = df.select(agg_approx_histogram($"tile")).first()
+ val hist1 = df.select(rf_agg_approx_histogram($"tile")).first()
val hist2 = df.selectExpr("rf_agg_approx_histogram(tile) as hist")
.select($"hist".as[CellHistogram])
.first()
@@ -399,7 +469,7 @@ class RasterFunctionsSpec extends FunSpec
it("should compute local statistics") {
val df = randNDTilesWithNull.toDF("tile")
- val stats1 = df.select(agg_local_stats($"tile"))
+ val stats1 = df.select(rf_agg_local_stats($"tile"))
.first()
val stats2 = df.selectExpr("rf_agg_local_stats(tile) as stats")
.select($"stats".as[LocalCellStatistics])
@@ -411,42 +481,42 @@ class RasterFunctionsSpec extends FunSpec
it("should compute local min") {
val df = Seq(two, three, one, six).toDF("tile")
- df.select(agg_local_min($"tile")).first() should be(one.toArrayTile())
+ df.select(rf_agg_local_min($"tile")).first() should be(one.toArrayTile())
df.selectExpr("rf_agg_local_min(tile)").as[Tile].first() should be(one.toArrayTile())
checkDocs("rf_agg_local_min")
}
it("should compute local max") {
val df = Seq(two, three, one, six).toDF("tile")
- df.select(agg_local_max($"tile")).first() should be(six.toArrayTile())
+ df.select(rf_agg_local_max($"tile")).first() should be(six.toArrayTile())
df.selectExpr("rf_agg_local_max(tile)").as[Tile].first() should be(six.toArrayTile())
checkDocs("rf_agg_local_max")
}
it("should compute local data cell counts") {
- val df = Seq(two, randNDTile, nd).toDF("tile")
- val t1 = df.select(agg_local_data_cells($"tile")).first()
+ val df = Seq(two, randNDPRT, nd).toDF("tile")
+ val t1 = df.select(rf_agg_local_data_cells($"tile")).first()
val t2 = df.selectExpr("rf_agg_local_data_cells(tile) as cnt").select($"cnt".as[Tile]).first()
t1 should be (t2)
checkDocs("rf_agg_local_data_cells")
}
it("should compute local no-data cell counts") {
- val df = Seq(two, randNDTile, nd).toDF("tile")
- val t1 = df.select(agg_local_no_data_cells($"tile")).first()
+ val df = Seq(two, randNDPRT, nd).toDF("tile")
+ val t1 = df.select(rf_agg_local_no_data_cells($"tile")).first()
val t2 = df.selectExpr("rf_agg_local_no_data_cells(tile) as cnt").select($"cnt".as[Tile]).first()
t1 should be (t2)
- val t3 = df.select(local_add(agg_local_data_cells($"tile"), agg_local_no_data_cells($"tile"))).first()
+ val t3 = df.select(rf_local_add(rf_agg_local_data_cells($"tile"), rf_agg_local_no_data_cells($"tile"))).first()
t3 should be(three.toArrayTile())
checkDocs("rf_agg_local_no_data_cells")
}
}
describe("analytical transformations") {
- it("should compute normalized_difference") {
+ it("should compute rf_normalized_difference") {
val df = Seq((three, two)).toDF("three", "two")
- df.select(tile_to_array_double(normalized_difference($"three", $"two")))
+ df.select(rf_tile_to_array_double(rf_normalized_difference($"three", $"two")))
.first()
.forall(_ == 0.2) shouldBe true
@@ -459,18 +529,18 @@ class RasterFunctionsSpec extends FunSpec
}
it("should mask one tile against another") {
- val df = Seq[Tile](randTile).toDF("tile")
+ val df = Seq[Tile](randPRT).toDF("tile")
val withMask = df.withColumn("mask",
- convert_cell_type(
- local_greater($"tile", 50),
+ rf_convert_cell_type(
+ rf_local_greater($"tile", 50),
"uint8")
)
val withMasked = withMask.withColumn("masked",
- mask($"tile", $"mask"))
+ rf_mask($"tile", $"mask"))
- val result = withMasked.agg(agg_no_data_cells($"tile") < agg_no_data_cells($"masked")).as[Boolean]
+ val result = withMasked.agg(rf_agg_no_data_cells($"tile") < rf_agg_no_data_cells($"masked")).as[Boolean]
result.first() should be(true)
@@ -478,22 +548,22 @@ class RasterFunctionsSpec extends FunSpec
}
it("should inverse mask one tile against another") {
- val df = Seq[Tile](randTile).toDF("tile")
+ val df = Seq[Tile](randPRT).toDF("tile")
- val baseND = df.select(agg_no_data_cells($"tile")).first()
+ val baseND = df.select(rf_agg_no_data_cells($"tile")).first()
val withMask = df.withColumn("mask",
- convert_cell_type(
- local_greater($"tile", 50),
+ rf_convert_cell_type(
+ rf_local_greater($"tile", 50),
"uint8"
)
)
val withMasked = withMask
- .withColumn("masked", mask($"tile", $"mask"))
- .withColumn("inv_masked", inverse_mask($"tile", $"mask"))
+ .withColumn("masked", rf_mask($"tile", $"mask"))
+ .withColumn("inv_masked", rf_inverse_mask($"tile", $"mask"))
- val result = withMasked.agg(agg_no_data_cells($"masked") + agg_no_data_cells($"inv_masked")).as[Long]
+ val result = withMasked.agg(rf_agg_no_data_cells($"masked") + rf_agg_no_data_cells($"inv_masked")).as[Long]
result.first() should be(tileSize + baseND)
@@ -501,29 +571,50 @@ class RasterFunctionsSpec extends FunSpec
}
it("should mask tile by another identified by specified value") {
- val df = Seq[Tile](randTile).toDF("tile")
+ val df = Seq[Tile](randPRT).toDF("tile")
val mask_value = 4
val withMask = df.withColumn("mask",
- local_multiply(convert_cell_type(
- local_greater($"tile", 50),
+ rf_local_multiply(rf_convert_cell_type(
+ rf_local_greater($"tile", 50),
"uint8"),
lit(mask_value)
)
)
val withMasked = withMask.withColumn("masked",
- mask_by_value($"tile", $"mask", lit(mask_value)))
+ rf_mask_by_value($"tile", $"mask", lit(mask_value)))
- val result = withMasked.agg(agg_no_data_cells($"tile") < agg_no_data_cells($"masked")).as[Boolean]
+ val result = withMasked.agg(rf_agg_no_data_cells($"tile") < rf_agg_no_data_cells($"masked")).as[Boolean]
result.first() should be(true)
checkDocs("rf_mask_by_value")
}
+ it("should inverse mask tile by another identified by specified value") {
+ val df = Seq[Tile](randPRT).toDF("tile")
+ val mask_value = 4
+
+ val withMask = df.withColumn("mask",
+ rf_local_multiply(rf_convert_cell_type(
+ rf_local_greater($"tile", 50),
+ "uint8"),
+ lit(mask_value)
+ )
+ )
+
+ val withMasked = withMask.withColumn("masked",
+ rf_inverse_mask_by_value($"tile", $"mask", lit(mask_value)))
+
+ val result = withMasked.agg(rf_agg_no_data_cells($"tile") < rf_agg_no_data_cells($"masked")).as[Boolean]
+
+ result.first() should be(true)
+ checkDocs("rf_inverse_mask_by_value")
+ }
+
it("should render ascii art") {
val df = Seq[Tile](ProjectedRasterTile(TestData.l8Labels)).toDF("tile")
- val r1 = df.select(render_ascii($"tile"))
+ val r1 = df.select(rf_render_ascii($"tile"))
val r2 = df.selectExpr("rf_render_ascii(tile)").as[String]
r1.first() should be(r2.first())
checkDocs("rf_render_ascii")
@@ -531,7 +622,7 @@ class RasterFunctionsSpec extends FunSpec
it("should render cells as matrix") {
val df = Seq(randDoubleNDTile).toDF("tile")
- val r1 = df.select(render_matrix($"tile"))
+ val r1 = df.select(rf_render_matrix($"tile"))
val r2 = df.selectExpr("rf_render_matrix(tile)").as[String]
r1.first() should be(r2.first())
checkDocs("rf_render_matrix")
@@ -545,9 +636,9 @@ class RasterFunctionsSpec extends FunSpec
val df = Seq((three_plus, three_less, three)).toDF("three_plus", "three_less", "three")
- assertEqual(df.select(round($"three")).as[ProjectedRasterTile].first(), three)
- assertEqual(df.select(round($"three_plus")).as[ProjectedRasterTile].first(), three_double)
- assertEqual(df.select(round($"three_less")).as[ProjectedRasterTile].first(), three_double)
+ assertEqual(df.select(rf_round($"three")).as[ProjectedRasterTile].first(), three)
+ assertEqual(df.select(rf_round($"three_plus")).as[ProjectedRasterTile].first(), three_double)
+ assertEqual(df.select(rf_round($"three_less")).as[ProjectedRasterTile].first(), three_double)
assertEqual(df.selectExpr("rf_round(three)").as[ProjectedRasterTile].first(), three)
assertEqual(df.selectExpr("rf_round(three_plus)").as[ProjectedRasterTile].first(), three_double)
@@ -556,27 +647,36 @@ class RasterFunctionsSpec extends FunSpec
checkDocs("rf_round")
}
+ it("should abs cell values") {
+ val minus = one.mapTile(t => t.convert(IntConstantNoDataCellType) * -1)
+ val df = Seq((minus, one)).toDF("minus", "one")
+
+ assertEqual(df.select(rf_abs($"minus").as[ProjectedRasterTile]).first(), one)
+
+ checkDocs("rf_abs")
+ }
+
it("should take logarithms positive cell values"){
- // log10 1000 == 3
+ // rf_log10 1000 == 3
val thousand = TestData.projectedRasterTile(cols, rows, 1000, extent, crs, ShortConstantNoDataCellType)
val threesDouble = TestData.projectedRasterTile(cols, rows, 3.0, extent, crs, DoubleConstantNoDataCellType)
val zerosDouble = TestData.projectedRasterTile(cols, rows, 0.0, extent, crs, DoubleConstantNoDataCellType)
val df1 = Seq(thousand).toDF("tile")
- assertEqual(df1.select(log10($"tile")).as[ProjectedRasterTile].first(), threesDouble)
+ assertEqual(df1.select(rf_log10($"tile")).as[ProjectedRasterTile].first(), threesDouble)
- // ln random tile == log10 random tile / log10(e); random tile square to ensure all positive cell values
+ // ln random tile == rf_log10 random tile / rf_log10(e); random tile square to ensure all positive cell values
val df2 = Seq(randPositiveDoubleTile).toDF("tile")
val log10e = math.log10(math.E)
- assertEqual(df2.select(log($"tile")).as[ProjectedRasterTile].first(),
- df2.select(log10($"tile")).as[ProjectedRasterTile].first() / log10e)
+ assertEqual(df2.select(rf_log($"tile")).as[ProjectedRasterTile].first(),
+ df2.select(rf_log10($"tile")).as[ProjectedRasterTile].first() / log10e)
lazy val maybeZeros = df2
.selectExpr(s"rf_local_subtract(rf_log(tile), rf_local_divide(rf_log10(tile), ${log10e}))")
.as[ProjectedRasterTile].first()
assertEqual(maybeZeros, zerosDouble)
- // log1p for zeros should be ln(1)
+ // rf_log1p for zeros should be ln(1)
val ln1 = math.log1p(0.0)
val df3 = Seq(zero).toDF("tile")
val maybeLn1 = df3.selectExpr(s"rf_log1p(tile)").as[ProjectedRasterTile].first()
@@ -594,42 +694,42 @@ class RasterFunctionsSpec extends FunSpec
// tile zeros ==> -Infinity
val df_0 = Seq(zero).toDF("tile")
- assertEqual(df_0.select(log($"tile")).as[ProjectedRasterTile].first(), ni_float)
- assertEqual(df_0.select(log10($"tile")).as[ProjectedRasterTile].first(), ni_float)
- assertEqual(df_0.select(log2($"tile")).as[ProjectedRasterTile].first(), ni_float)
- // log1p of zeros should be 0.
- assertEqual(df_0.select(log1p($"tile")).as[ProjectedRasterTile].first(), zero_float)
+ assertEqual(df_0.select(rf_log($"tile")).as[ProjectedRasterTile].first(), ni_float)
+ assertEqual(df_0.select(rf_log10($"tile")).as[ProjectedRasterTile].first(), ni_float)
+ assertEqual(df_0.select(rf_log2($"tile")).as[ProjectedRasterTile].first(), ni_float)
+ // rf_log1p of zeros should be 0.
+ assertEqual(df_0.select(rf_log1p($"tile")).as[ProjectedRasterTile].first(), zero_float)
// tile negative values ==> NaN
assert(df_0.selectExpr("rf_log(rf_local_subtract(tile, 42))").as[ProjectedRasterTile].first().isNoDataTile)
assert(df_0.selectExpr("rf_log2(rf_local_subtract(tile, 42))").as[ProjectedRasterTile].first().isNoDataTile)
- assert(df_0.select(log1p(local_subtract($"tile", 42))).as[ProjectedRasterTile].first().isNoDataTile)
- assert(df_0.select(log10(local_subtract($"tile", lit(0.01)))).as[ProjectedRasterTile].first().isNoDataTile)
+ assert(df_0.select(rf_log1p(rf_local_subtract($"tile", 42))).as[ProjectedRasterTile].first().isNoDataTile)
+ assert(df_0.select(rf_log10(rf_local_subtract($"tile", lit(0.01)))).as[ProjectedRasterTile].first().isNoDataTile)
}
it("should take exponential") {
val df = Seq(six).toDF("tile")
- // exp inverses log
+ // rf_exp inverses rf_log
assertEqual(
- df.select(exp(log($"tile"))).as[ProjectedRasterTile].first(),
+ df.select(rf_exp(rf_log($"tile"))).as[ProjectedRasterTile].first(),
six
)
// base 2
assertEqual(
- df.select(exp2(log2($"tile"))).as[ProjectedRasterTile].first(),
+ df.select(rf_exp2(rf_log2($"tile"))).as[ProjectedRasterTile].first(),
six)
// base 10
assertEqual(
- df.select(exp10(log10($"tile"))).as[ProjectedRasterTile].first(),
+ df.select(rf_exp10(rf_log10($"tile"))).as[ProjectedRasterTile].first(),
six)
// plus/minus 1
assertEqual(
- df.select(expm1(log1p($"tile"))).as[ProjectedRasterTile].first(),
+ df.select(rf_expm1(rf_log1p($"tile"))).as[ProjectedRasterTile].first(),
six)
// SQL
@@ -647,7 +747,7 @@ class RasterFunctionsSpec extends FunSpec
df.selectExpr("rf_exp2(rf_log2(tile))").as[ProjectedRasterTile].first(),
six)
- // SQL expm1
+ // SQL rf_expm1
assertEqual(
df.selectExpr("rf_expm1(rf_log1p(tile))").as[ProjectedRasterTile].first(),
six)
@@ -678,11 +778,11 @@ class RasterFunctionsSpec extends FunSpec
def df = Seq(lowRes).toDF("tile")
- val maybeUp = df.select(resample($"tile", lit(2))).as[ProjectedRasterTile].first()
+ val maybeUp = df.select(rf_resample($"tile", lit(2))).as[ProjectedRasterTile].first()
assertEqual(maybeUp, upsampled)
def df2 = Seq((lowRes, fourByFour)).toDF("tile1", "tile2")
- val maybeUpShape = df2.select(resample($"tile1", $"tile2")).as[ProjectedRasterTile].first()
+ val maybeUpShape = df2.select(rf_resample($"tile1", $"tile2")).as[ProjectedRasterTile].first()
assertEqual(maybeUpShape, upsampled)
// Downsample by double argument < 1
diff --git a/core/src/test/scala/org/locationtech/rasterframes/RasterJoinSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/RasterJoinSpec.scala
new file mode 100644
index 000000000..b2cd5d8ce
--- /dev/null
+++ b/core/src/test/scala/org/locationtech/rasterframes/RasterJoinSpec.scala
@@ -0,0 +1,168 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
+
+import geotrellis.raster.resample.Bilinear
+import geotrellis.raster.testkit.RasterMatchers
+import geotrellis.raster.{IntConstantNoDataCellType, Raster, Tile}
+import org.apache.spark.sql.functions._
+import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate
+import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate.ProjectedRasterDefinition
+import org.locationtech.rasterframes.model.TileDimensions
+
+
+class RasterJoinSpec extends TestEnvironment with TestData with RasterMatchers {
+ import spark.implicits._
+ describe("Raster join between two DataFrames") {
+ val b4nativeTif = readSingleband("L8-B4-Elkton-VA.tiff")
+ // Same data, reprojected to EPSG:4326
+ val b4warpedTif = readSingleband("L8-B4-Elkton-VA-4326.tiff")
+
+ val b4nativeRf = b4nativeTif.toDF(TileDimensions(10, 10))
+ val b4warpedRf = b4warpedTif.toDF(TileDimensions(10, 10))
+ .withColumnRenamed("tile", "tile2")
+
+ it("should join the same scene correctly") {
+
+ val b4nativeRfPrime = b4nativeTif.toDF(TileDimensions(10, 10))
+ .withColumnRenamed("tile", "tile2")
+ val joined = b4nativeRf.rasterJoin(b4nativeRfPrime)
+
+ joined.count() should be (b4nativeRf.count())
+
+ val measure = joined.select(
+ rf_tile_mean(rf_local_subtract($"tile", $"tile2")) as "diff_mean",
+ rf_tile_stats(rf_local_subtract($"tile", $"tile2")).getField("variance") as "diff_var")
+ .as[(Double, Double)]
+ .collect()
+ all (measure) should be ((0.0, 0.0))
+ }
+
+ it("should join same scene in different tile sizes"){
+ val r1prime = b4nativeTif.toDF(TileDimensions(25, 25)).withColumnRenamed("tile", "tile2")
+ r1prime.select(rf_dimensions($"tile2").getField("rows")).as[Int].first() should be (25)
+ val joined = b4nativeRf.rasterJoin(r1prime)
+
+ joined.count() should be (b4nativeRf.count())
+
+ val measure = joined.select(
+ rf_tile_mean(rf_local_subtract($"tile", $"tile2")) as "diff_mean",
+ rf_tile_stats(rf_local_subtract($"tile", $"tile2")).getField("variance") as "diff_var")
+ .as[(Double, Double)]
+ .collect()
+ all (measure) should be ((0.0, 0.0))
+
+ }
+
+ it("should join same scene in two projections, same tile size") {
+
+ // b4warpedRf source data is gdal warped b4nativeRf data; join them together.
+ val joined = b4nativeRf.rasterJoin(b4warpedRf)
+ // create a Raster from tile2 which should be almost equal to b4nativeTif
+ val result = joined.agg(TileRasterizerAggregate(
+ ProjectedRasterDefinition(b4nativeTif.cols, b4nativeTif.rows, b4nativeTif.cellType, b4nativeTif.crs, b4nativeTif.extent, Bilinear),
+ $"crs", $"extent", $"tile2") as "raster"
+ ).select(col("raster").as[Raster[Tile]]).first()
+
+ result.extent shouldBe b4nativeTif.extent
+
+ // Test the overall local difference of the `result` versus the original
+ import geotrellis.raster.mapalgebra.local._
+ val sub = b4nativeTif.extent.buffer(-b4nativeTif.extent.width * 0.01)
+ val diff = Abs(
+ Subtract(
+ result.crop(sub).tile.convert(IntConstantNoDataCellType),
+ b4nativeTif.raster.crop(sub).tile.convert(IntConstantNoDataCellType)
+ )
+ )
+ // DN's within arbitrary threshold. N.B. the range of values in the source raster is (6396, 27835)
+ diff.statisticsDouble.get.mean should be (0.0 +- 200)
+ // Overall signal is preserved
+ val b4nativeStddev = b4nativeTif.tile.statisticsDouble.get.stddev
+ val rel_diff = diff.statisticsDouble.get.mean / b4nativeStddev
+ rel_diff should be (0.0 +- 0.15)
+
+ // Use the tile structure of the `joined` dataframe to argue that the structure of the image is similar between `b4nativeTif` and `joined.tile2`
+ val tile_diffs = joined.select((abs(rf_tile_mean($"tile") - rf_tile_mean($"tile2")) / lit( b4nativeStddev)).alias("z"))
+
+ // Check the 90%-ile z score; recognize there will be some localized areas of larger error
+ tile_diffs.selectExpr("percentile(z, 0.90)").as[Double].first() should be < 0.10
+ // Check the median z score; it is pretty close to zero
+ tile_diffs.selectExpr("percentile(z, 0.50)").as[Double].first() should be < 0.025
+ }
+
+ it("should join multiple RHS tile columns"){
+ // join multiple native CRS bands to the EPSG 4326 RF
+
+ val multibandRf = b4nativeRf
+ .withColumn("t_plus", rf_local_add($"tile", $"tile"))
+ .withColumn("t_mult", rf_local_multiply($"tile", $"tile"))
+ multibandRf.tileColumns.length should be (3)
+
+ val multibandJoin = multibandRf.rasterJoin(b4warpedRf)
+
+ multibandJoin.tileColumns.length should be (4)
+ multibandJoin.count() should be (multibandRf.count())
+ }
+
+ it("should join with heterogeneous LHS CRS and coverages"){
+
+ val df17 = readSingleband("m_3607824_se_17_1_20160620_subset.tif")
+ .toDF(TileDimensions(50, 50))
+ .withColumn("utm", lit(17))
+ // neighboring and slightly overlapping NAIP scene
+ val df18 = readSingleband("m_3607717_sw_18_1_20160620_subset.tif")
+ .toDF(TileDimensions(60, 60))
+ .withColumn("utm", lit(18))
+
+ df17.count() should be (6 * 6) // file is 300 x 300
+ df18.count() should be (5 * 5) // file is 300 x 300
+
+ val df = df17.union(df18)
+ df.count() should be (6 * 6 + 5 * 5)
+ val expectCrs = Array("+proj=utm +zone=17 +datum=NAD83 +units=m +no_defs ", "+proj=utm +zone=18 +datum=NAD83 +units=m +no_defs ")
+ df.select($"crs".getField("crsProj4")).distinct().as[String].collect() should contain theSameElementsAs expectCrs
+
+ // read a third source to join. burned in box that intersects both above subsets; but more so on the df17
+ val box = readSingleband("m_3607_box.tif").toDF(TileDimensions(4,4)).withColumnRenamed("tile", "burned")
+ val joined = df.rasterJoin(box)
+
+ joined.count() should be (df.count)
+
+ val totals = joined.groupBy($"utm").agg(sum(rf_tile_sum($"burned")).alias("burned_total"))
+ val total18 = totals.where($"utm" === 18).select($"burned_total").as[Double].first()
+ val total17 = totals.where($"utm" === 17).select($"burned_total").as[Double].first()
+
+ total18 should be > 0.0
+ total18 should be < total17
+
+
+ }
+
+ it("should pass through ancillary columns") {
+ val left = b4nativeRf.withColumn("left_id", monotonically_increasing_id())
+ val right = b4warpedRf.withColumn("right_id", monotonically_increasing_id())
+ val joined = left.rasterJoin(right)
+ joined.columns should contain allElementsOf Seq("left_id", "right_id_agg")
+ }
+ }
+}
diff --git a/core/src/test/scala/org/locationtech/rasterframes/ReprojectGeometrySpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ReprojectGeometrySpec.scala
new file mode 100644
index 000000000..a58294287
--- /dev/null
+++ b/core/src/test/scala/org/locationtech/rasterframes/ReprojectGeometrySpec.scala
@@ -0,0 +1,122 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes
+
+import geotrellis.proj4.{CRS, LatLng, Sinusoidal, WebMercator}
+import org.apache.spark.sql.Encoders
+import org.locationtech.jts.geom._
+
+/**
+ * Test for geometry reprojection.
+ *
+ * @since 11/29/18
+ */
+class ReprojectGeometrySpec extends TestEnvironment {
+ // Note: Test data copied from ReprojectSpec in GeoTrellis
+ val fact = new GeometryFactory()
+ val llLineString: Geometry = fact.createLineString(Array(
+ new Coordinate(-111.09374999999999, 34.784483415461345),
+ new Coordinate(-111.09374999999999, 43.29919735147067),
+ new Coordinate(-75.322265625, 43.29919735147067),
+ new Coordinate(-75.322265625, 34.784483415461345),
+ new Coordinate(-111.09374999999999, 34.784483415461345)
+ ))
+
+ val wmLineString: Geometry = fact.createLineString(Array(
+ new Coordinate(-12366899.680315234, 4134631.734001753),
+ new Coordinate(-12366899.680315234, 5357624.186564572),
+ new Coordinate(-8384836.254770693, 5357624.186564572),
+ new Coordinate(-8384836.254770693, 4134631.734001753),
+ new Coordinate(-12366899.680315234, 4134631.734001753)
+ ))
+
+ describe("Geometry reprojection") {
+ import spark.implicits._
+
+ it("should handle two literal CRSs") {
+
+ val df = Seq((llLineString, wmLineString)).toDF("ll", "wm")
+
+ val rp = df.select(
+ st_reproject($"ll", LatLng, WebMercator) as "wm2",
+ st_reproject($"wm", WebMercator, LatLng) as "ll2",
+ st_reproject(st_reproject($"ll", LatLng, Sinusoidal), Sinusoidal, WebMercator) as "wm3"
+ ).as[(Geometry, Geometry, Geometry)]
+
+
+ val (wm2, ll2, wm3) = rp.first()
+
+ wm2 should matchGeom(wmLineString, 0.00001)
+ ll2 should matchGeom(llLineString, 0.00001)
+ wm3 should matchGeom(wmLineString, 0.00001)
+ }
+
+ it("should handle one literal crs") {
+ implicit val enc = Encoders.tuple(jtsGeometryEncoder, jtsGeometryEncoder, crsEncoder)
+ val df = Seq((llLineString, wmLineString, LatLng: CRS)).toDF("ll", "wm", "llCRS")
+
+ val rp = df.select(
+ st_reproject($"ll", $"llCRS", WebMercator) as "wm2",
+ st_reproject($"wm", WebMercator, $"llCRS") as "ll2",
+ st_reproject(st_reproject($"ll", $"llCRS", Sinusoidal), Sinusoidal, WebMercator) as "wm3"
+ ).as[(Geometry, Geometry, Geometry)]
+
+
+ val (wm2, ll2, wm3) = rp.first()
+
+ wm2 should matchGeom(wmLineString, 0.00001)
+ ll2 should matchGeom(llLineString, 0.00001)
+ wm3 should matchGeom(wmLineString, 0.00001)
+ }
+
+ it("should accept other geometry types") {
+ val df = Seq(1, 2, 3).toDF("id")
+
+ noException shouldBe thrownBy {
+ df.select(st_reproject(st_makePoint($"id", $"id"), WebMercator, Sinusoidal)).count()
+ }
+ }
+
+ it("should work in SQL") {
+ implicit val enc = Encoders.tuple(jtsGeometryEncoder, jtsGeometryEncoder, crsEncoder)
+ val df = Seq((llLineString, wmLineString, LatLng: CRS)).toDF("ll", "wm", "llCRS")
+ df.createOrReplaceTempView("geom")
+
+ val rp = spark.sql(
+ """
+ | SELECT st_reproject(ll, llCRS, 'EPSG:3857') as wm2,
+ | st_reproject(wm, 'EPSG:3857', llCRS) as ll2,
+ | st_reproject(st_reproject(ll, llCRS, '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'),
+ | '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs', 'EPSG:3857') as wm3
+ | FROM geom
+ """.stripMargin).as[(Geometry, Geometry, Geometry)]
+
+ val (wm2, ll2, wm3) = rp.first()
+
+ wm2 should matchGeom(wmLineString, 0.00001)
+ ll2 should matchGeom(llLineString, 0.00001)
+ wm3 should matchGeom(wmLineString, 0.00001)
+
+ checkDocs("st_reproject")
+ }
+ }
+}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala b/core/src/test/scala/org/locationtech/rasterframes/SpatialKeySpec.scala
similarity index 87%
rename from core/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/SpatialKeySpec.scala
index 065e9a5ed..b99b5c48e 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/SpatialKeySpec.scala
@@ -15,11 +15,12 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
-import com.vividsolutions.jts.geom.Polygon
import geotrellis.proj4.LatLng
import geotrellis.vector.Point
import org.locationtech.geomesa.curve.Z2SFC
@@ -37,11 +38,11 @@ class SpatialKeySpec extends TestEnvironment with TestData {
describe("Spatial key conversions") {
val raster = sampleGeoTiff.projectedRaster
// Create a raster frame with a single row
- val rf = raster.toRF(raster.tile.cols, raster.tile.rows)
+ val rf = raster.toLayer(raster.tile.cols, raster.tile.rows)
it("should add an extent column") {
val expected = raster.extent.jtsGeom
- val result = rf.withBounds().select($"bounds".as[Polygon]).first
+ val result = rf.withGeometry().select(GEOMETRY_COLUMN).first
assert(result === expected)
}
@@ -64,6 +65,4 @@ class SpatialKeySpec extends TestEnvironment with TestData {
assert(result === expected)
}
}
- // This is to avoid an IntelliJ error
- protected def withFixture(test: Any) = ???
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TestData.scala b/core/src/test/scala/org/locationtech/rasterframes/TestData.scala
similarity index 76%
rename from core/src/test/scala/astraea/spark/rasterframes/TestData.scala
rename to core/src/test/scala/org/locationtech/rasterframes/TestData.scala
index 29c06849e..1b6b373e9 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TestData.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/TestData.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright (c) 2017. Astraea, Inc.
+ * Copyright 2017 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -14,19 +14,17 @@
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
import java.net.URI
-import java.nio.file.Paths
+import java.nio.file.{Files, Paths}
import java.time.ZonedDateTime
-import astraea.spark.rasterframes.expressions.tilestats.NoDataCells
-import astraea.spark.rasterframes.model.TileContext
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
-import astraea.spark.rasterframes.{functions => F}
-import com.vividsolutions.jts.geom.{Coordinate, GeometryFactory}
import geotrellis.proj4.{CRS, LatLng}
import geotrellis.raster
import geotrellis.raster._
@@ -38,6 +36,10 @@ import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.commons.io.IOUtils
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
+import org.locationtech.jts.geom.{Coordinate, GeometryFactory}
+import org.locationtech.rasterframes.expressions.tilestats.NoDataCells
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
+import spray.json.JsObject
import scala.reflect.ClassTag
@@ -109,6 +111,12 @@ trait TestData {
require((1 to 11).contains(band), "Invalid band number")
readSingleband(s"L8-B$band-Elkton-VA.tiff")
}
+
+ def l8SamplePath(band: Int) = {
+ require((1 to 11).contains(band), "Invalid band number")
+ getClass.getResource(s"/L8-B$band-Elkton-VA.tiff").toURI
+ }
+
def l8Labels = readSingleband("L8-Labels-Elkton-VA.tiff")
def naipSample(band: Int) = {
@@ -116,22 +124,36 @@ trait TestData {
readSingleband(s"NAIP-VA-b$band.tiff")
}
- def rgbCogSample = readMultiband("LC08_RGB_Norfolk_COG.tiff")
+ def rgbCogSample = readMultiband("LC08_RGB_Norfolk_COG.tiff")
+
+ def rgbCogSamplePath = getClass.getResource("/LC08_RGB_Norfolk_COG.tiff").toURI
def sampleTileLayerRDD(implicit spark: SparkSession): TileLayerRDD[SpatialKey] = {
- val rf = sampleGeoTiff.projectedRaster.toRF(128, 128)
+ val rf = sampleGeoTiff.projectedRaster.toLayer(128, 128)
rf.toTileLayerRDD(rf.tileColumns.head).left.get
}
private val baseCOG = "https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/149/039/LC08_L1TP_149039_20170411_20170415_01_T1/LC08_L1TP_149039_20170411_20170415_01_T1_%s.TIF"
- lazy val remoteCOGSingleband1 = URI.create(baseCOG.format("B1"))
- lazy val remoteCOGSingleband2 = URI.create(baseCOG.format("B2"))
+ lazy val remoteCOGSingleband1: URI = URI.create(baseCOG.format("B1"))
+ lazy val remoteCOGSingleband2: URI = URI.create(baseCOG.format("B2"))
+
+ lazy val remoteCOGMultiband: URI = URI.create("https://s3-us-west-2.amazonaws.com/radiant-nasa-iserv/2014/02/14/IP0201402141023382027S03100E/IP0201402141023382027S03100E-COG.tif")
+
+ lazy val remoteMODIS: URI = URI.create("https://modis-pds.s3.amazonaws.com/MCD43A4.006/31/11/2017158/MCD43A4.A2017158.h31v11.006.2017171203421_B01.TIF")
+ lazy val remoteL8: URI = URI.create("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/017/033/LC08_L1TP_017033_20181010_20181030_01_T1/LC08_L1TP_017033_20181010_20181030_01_T1_B4.TIF")
+ lazy val remoteHttpMrfPath: URI = URI.create("https://s3.amazonaws.com/s22s-rasterframes-integration-tests/m_3607526_sw_18_1_20160708.mrf")
+ lazy val remoteS3MrfPath: URI = URI.create("s3://naip-analytic/va/2016/100cm/rgbir/37077/m_3707764_sw_18_1_20160708.mrf")
- lazy val remoteCOGMultiband = URI.create("https://s3-us-west-2.amazonaws.com/radiant-nasa-iserv/2014/02/14/IP0201402141023382027S03100E/IP0201402141023382027S03100E-COG.tif")
+ lazy val localSentinel: URI = getClass.getResource("/B01.jp2").toURI
+ lazy val cogPath: URI = getClass.getResource("/LC08_RGB_Norfolk_COG.tiff").toURI
+ lazy val singlebandCogPath: URI = getClass.getResource("/LC08_B7_Memphis_COG.tiff").toURI
+ lazy val nonCogPath: URI = getClass.getResource("/L8-B8-Robinson-IL.tiff").toURI
- lazy val remoteMODIS = URI.create("https://modis-pds.s3.amazonaws.com/MCD43A4.006/31/11/2017158/MCD43A4.A2017158.h31v11.006.2017171203421_B01.TIF")
+ lazy val l8B1SamplePath: URI = l8SamplePath(1)
+ lazy val l8samplePath: URI = getClass.getResource("/L8-B1-Elkton-VA.tiff").toURI
+ lazy val modisConvertedMrfPath: URI = getClass.getResource("/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf").toURI
- object JTS {
+ object GeomData {
val fact = new GeometryFactory()
val c1 = new Coordinate(1, 2)
val c2 = new Coordinate(3, 4)
@@ -144,6 +166,19 @@ trait TestData {
val mpoly = fact.createMultiPolygon(Array(poly, poly, poly))
val coll = fact.createGeometryCollection(Array(point, line, poly, mpoint, mline, mpoly))
val all = Seq(point, line, poly, mpoint, mline, mpoly, coll)
+ lazy val geoJson = {
+ import scala.collection.JavaConversions._
+ val p = Paths.get(TestData.getClass
+ .getResource("/L8-Labels-Elkton-VA.geojson").toURI)
+ Files.readAllLines(p).mkString("\n")
+ }
+ lazy val features = {
+ import geotrellis.vector.io._
+ import geotrellis.vector.io.json.JsonFeatureCollection
+ import spray.json.DefaultJsonProtocol._
+ import spray.json._
+ GeomData.geoJson.parseGeoJson[JsonFeatureCollection].getAllPolygonFeatures[JsObject]()
+ }
}
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala b/core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala
similarity index 74%
rename from core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala
rename to core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala
index aaf173014..87ab2559d 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright (c) 2017. Astraea, Inc.
+ * Copyright 2017 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -14,25 +14,25 @@
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
import java.nio.file.{Files, Paths}
-import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.stringEnc
-import astraea.spark.rasterframes.ref.RasterSource
-import astraea.spark.rasterframes.ref.RasterSource.ReadCallback
-import astraea.spark.rasterframes.util.toParquetFriendlyColumnName
-import com.vividsolutions.jts.geom.Geometry
+import com.typesafe.scalalogging.LazyLogging
import geotrellis.spark.testkit.{TestEnvironment => GeoTrellisTestEnvironment}
-import geotrellis.util.LazyLogging
import org.apache.spark.SparkContext
import org.apache.spark.sql._
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.types.StructType
+import org.locationtech.jts.geom.Geometry
import org.scalactic.Tolerance
import org.scalatest._
import org.scalatest.matchers.{MatchResult, Matcher}
+import org.locationtech.rasterframes.util._
trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment
with Matchers with Inspectors with Tolerance with LazyLogging {
@@ -40,11 +40,14 @@ trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment
override def sparkMaster: String = "local[*]"
override implicit def sc: SparkContext = { _sc.setLogLevel("ERROR"); _sc }
- //p.setProperty(“spark.driver.allowMultipleContexts”, “true”)
lazy val sqlContext: SQLContext = {
- val session = SparkSession.builder.config(_sc.getConf).getOrCreate()
- astraea.spark.rasterframes.WithSQLContextMethods(session.sqlContext).withRasterFrames
+ val session = SparkSession.builder
+ .config(_sc.getConf)
+ .config("spark.sql.crossJoin.enabled", true)
+ .withKryoSerialization
+ .getOrCreate()
+ session.sqlContext.withRasterFrames
}
lazy val sql: String ⇒ DataFrame = sqlContext.sql
@@ -86,6 +89,7 @@ trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment
def matchGeom(g: Geometry, tolerance: Double) = new GeometryMatcher(g, tolerance)
def checkDocs(name: String): Unit = {
+ import spark.implicits._
val docs = sql(s"DESCRIBE FUNCTION EXTENDED $name").as[String].collect().mkString("\n")
docs should include(name)
docs shouldNot include("not found")
@@ -95,16 +99,5 @@ trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment
}
object TestEnvironment {
- case class ReadMonitor(ignoreHeader: Boolean = true) extends ReadCallback with LazyLogging {
- var reads: Int = 0
- var total: Long = 0
- override def readRange(source: RasterSource, start: Long, length: Int): Unit = {
- logger.trace(s"Reading $length at $start from $source")
- // Ignore header reads
- if(!ignoreHeader || start > 0) reads += 1
- total += length
- }
- override def toString: String = s"$productPrefix(reads=$reads, total=$total)"
- }
}
\ No newline at end of file
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/TileAssemblerSpec.scala
similarity index 78%
rename from core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/TileAssemblerSpec.scala
index 29eff421f..757231595 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/TileAssemblerSpec.scala
@@ -19,16 +19,15 @@
*
*/
-package astraea.spark.rasterframes
-import astraea.spark.rasterframes.ref.RasterSource
-import astraea.spark.rasterframes.ref.RasterSource.InMemoryRasterSource
+package org.locationtech.rasterframes
+
import com.typesafe.scalalogging.LazyLogging
import geotrellis.proj4.LatLng
import geotrellis.raster._
import geotrellis.raster.render.ColorRamps
import geotrellis.vector.Extent
-import org.apache.spark.sql._
-import org.apache.spark.sql.{functions => F}
+import org.apache.spark.sql.{functions => F, _}
+import org.locationtech.rasterframes.ref.{InMemoryRasterSource, RasterSource}
/**
*
@@ -42,15 +41,15 @@ class TileAssemblerSpec extends TestEnvironment {
it("should reassemble a small scene") {
val raster = TestData.l8Sample(8).projectedRaster
- val rf = raster.toRF(16, 16)
+ val rf = raster.toLayer(16, 16)
val ct = rf.tileLayerMetadata.merge.cellType
val (tileCols, tileRows) = rf.tileLayerMetadata.merge.tileLayout.tileDimensions
- val exploded = rf.select($"spatial_key", explode_tiles($"tile"))
+ val exploded = rf.select($"spatial_key", rf_explode_tiles($"tile"))
val assembled = exploded
.groupBy($"spatial_key")
- .agg(assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, $"tile", tileCols, tileRows, ct))
+ .agg(rf_assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, $"tile", tileCols, tileRows, ct))
assert(
@@ -65,12 +64,13 @@ class TileAssemblerSpec extends TestEnvironment {
val sceneSize = (260, 257)
val rs = InMemoryRasterSource(TestData.randomTile(sceneSize._1, sceneSize._2, ByteConstantNoDataCellType), Extent(10, 20, 30, 40), LatLng)
val df = rs.toDF
- val exploded = df.select($"spatial_index", $"extent", tile_dimensions($"tile") as "tile_dimensions", explode_tiles($"tile"))
+ val exploded = df.select($"spatial_index", $"extent", rf_dimensions($"tile") as "tile_dimensions", rf_explode_tiles($"tile"))
val assembled = exploded
.groupBy($"spatial_index", $"extent", $"tile_dimensions")
.agg(
- convert_cell_type(assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN,
+ rf_convert_cell_type(
+ rf_assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN,
$"tile", $"tile_dimensions.cols", $"tile_dimensions.rows"), rs.cellType) as "tile"
)
@@ -89,7 +89,7 @@ class TileAssemblerSpec extends TestEnvironment {
val exploded = util.time("exploded") {
df
- .select($"spatial_index", explode_tiles($"tile"))
+ .select($"spatial_index", rf_explode_tiles($"tile"))
.forceCache
}
@@ -98,7 +98,7 @@ class TileAssemblerSpec extends TestEnvironment {
val assembled = util.time("assembled") {
exploded
.groupBy($"spatial_index")
- .agg(assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN,
+ .agg(rf_assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN,
$"tile", 256, 256,
UShortUserDefinedNoDataCellType(32767)))
.forceCache
@@ -111,8 +111,8 @@ class TileAssemblerSpec extends TestEnvironment {
assert(assembled.count() === df.count())
- val expected = df.select(agg_stats($"tile")).first()
- val result = assembled.select(agg_stats($"tile")).first()
+ val expected = df.select(rf_agg_stats($"tile")).first()
+ val result = assembled.select(rf_agg_stats($"tile")).first()
assert(result.copy(no_data_cells = expected.no_data_cells) === expected)
}
@@ -134,9 +134,9 @@ object TileAssemblerSpec extends LazyLogging {
implicit class WithToDF(val rs: RasterSource) {
def toDF(implicit spark: SparkSession): DataFrame = {
import spark.implicits._
- rs.readAll().left.get
+ rs.readAll()
.zipWithIndex
- .map { case (r, i) ⇒ (i, r.extent, r.tile) }
+ .map { case (r, i) ⇒ (i, r.extent, r.tile.band(0)) }
.toDF("spatial_index", "extent", "tile")
.repartition($"spatial_index")
.forceCache
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/TileStatsSpec.scala
similarity index 78%
rename from core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/TileStatsSpec.scala
index 781b8290d..50920ab1c 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/TileStatsSpec.scala
@@ -15,18 +15,18 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
-import astraea.spark.rasterframes.TestData.randomTile
-import astraea.spark.rasterframes.TestData.fracTile
-import astraea.spark.rasterframes.expressions.aggstats.LocalMeanAggregate
-import astraea.spark.rasterframes.stats.CellHistogram
import geotrellis.raster._
-import geotrellis.spark._
import geotrellis.raster.mapalgebra.local.{Max, Min}
+import geotrellis.spark._
import org.apache.spark.sql.functions._
+import org.locationtech.rasterframes.TestData.randomTile
+import org.locationtech.rasterframes.stats.CellHistogram
/**
* Test rig associated with computing statistics and other descriptive
@@ -35,21 +35,21 @@ import org.apache.spark.sql.functions._
* @since 9/18/17
*/
class TileStatsSpec extends TestEnvironment with TestData {
- import sqlContext.implicits._
import TestData.injectND
+ import sqlContext.implicits._
describe("computing statistics over tiles") {
//import org.apache.spark.sql.execution.debug._
it("should report dimensions") {
val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2")
- val dims = df.select(tile_dimensions($"tile1") as "dims").select("dims.*")
+ val dims = df.select(rf_dimensions($"tile1") as "dims").select("dims.*")
assert(dims.as[(Int, Int)].first() === (3, 3))
assert(dims.schema.head.name === "cols")
val query = sql("""|select dims.* from (
- |select rf_tile_dimensions(tiles) as dims from (
+ |select rf_dimensions(tiles) as dims from (
|select rf_make_constant_tile(1, 10, 10, 'int8raw') as tiles))
|""".stripMargin)
write(query)
@@ -57,7 +57,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
df.repartition(4).createOrReplaceTempView("tmp")
assert(
- sql("select dims.* from (select rf_tile_dimensions(tile2) as dims from tmp)")
+ sql("select dims.* from (select rf_dimensions(tile2) as dims from tmp)")
.as[(Int, Int)]
.first() === (3, 3))
}
@@ -67,20 +67,20 @@ class TileStatsSpec extends TestEnvironment with TestData {
forEvery(ct) { c =>
val expected = CellType.fromName(c)
val tile = randomTile(5, 5, expected)
- val result = Seq(tile).toDF("tile").select(cell_type($"tile")).first()
+ val result = Seq(tile).toDF("tile").select(rf_cell_type($"tile")).first()
result should be(expected)
}
}
// tiles defined for the next few tests
- val tile1 = fracTile(10, 10, 5)
+ val tile1 = TestData.fracTile(10, 10, 5)
val tile2 = ArrayTile(Array(-5, -4, -3, -2, -1, 0, 1, 2, 3), 3, 3)
val tile3 = randomTile(255, 255, IntCellType)
it("should compute accurate item counts") {
val ds = Seq[Tile](tile1, tile2, tile3).toDF("tiles")
val checkedValues = Seq[Double](0, 4, 7, 13, 26)
- val result = checkedValues.map(x => ds.select(tile_histogram($"tiles")).first().itemCount(x))
+ val result = checkedValues.map(x => ds.select(rf_tile_histogram($"tiles")).first().itemCount(x))
forEvery(checkedValues) { x =>
assert((x == 0 && result.head == 4) || result.contains(x - 1))
}
@@ -89,7 +89,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
it("Should compute quantiles") {
val ds = Seq[Tile](tile1, tile2, tile3).toDF("tiles")
val numBreaks = 5
- val breaks = ds.select(tile_histogram($"tiles")).map(_.quantileBreaks(numBreaks)).collect()
+ val breaks = ds.select(rf_tile_histogram($"tiles")).map(_.quantileBreaks(numBreaks)).collect()
assert(breaks(1).length === numBreaks)
assert(breaks(0).apply(2) == 25)
assert(breaks(1).max <= 3 && breaks.apply(1).min >= -5)
@@ -101,7 +101,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
ds.createOrReplaceTempView("tmp")
withClue("max") {
- val max = ds.agg(agg_local_max($"tiles"))
+ val max = ds.agg(rf_agg_local_max($"tiles"))
val expected = Max(byteArrayTile, byteConstantTile)
write(max)
assert(max.as[Tile].first() === expected)
@@ -112,7 +112,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
}
withClue("min") {
- val min = ds.agg(agg_local_min($"tiles"))
+ val min = ds.agg(rf_agg_local_min($"tiles"))
val expected = Min(byteArrayTile, byteConstantTile)
write(min)
assert(min.as[Tile].first() === Min(byteArrayTile, byteConstantTile))
@@ -127,19 +127,19 @@ class TileStatsSpec extends TestEnvironment with TestData {
withClue("mean") {
val ds = Seq.fill[Tile](3)(randomTile(5, 5, FloatConstantNoDataCellType)).toDS()
- val means1 = ds.select(tile_stats($"value")).map(_.mean).collect
- val means2 = ds.select(tile_mean($"value")).collect
+ val means1 = ds.select(rf_tile_stats($"value")).map(_.mean).collect
+ val means2 = ds.select(rf_tile_mean($"value")).collect
// Compute the mean manually, knowing we're not dealing with no-data values.
val means =
- ds.select(tile_to_array_double($"value")).map(a => a.sum / a.length).collect
+ ds.select(rf_tile_to_array_double($"value")).map(a => a.sum / a.length).collect
forAll(means.zip(means1)) { case (l, r) => assert(l === r +- 1e-6) }
forAll(means.zip(means2)) { case (l, r) => assert(l === r +- 1e-6) }
}
withClue("sum") {
- val rf = l8Sample(1).projectedRaster.toRF
+ val rf = l8Sample(1).toDF()
val expected = 309149454 // computed with rasterio
- val result = rf.agg(sum(tile_sum($"tile"))).collect().head.getDouble(0)
+ val result = rf.agg(sum(rf_tile_sum($"tile"))).collect().head.getDouble(0)
logger.info(s"L8 sample band 1 grand total: ${result}")
assert(result === expected)
}
@@ -149,7 +149,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
val ds = Seq.fill[Tile](3)(randomTile(5, 5, FloatCellType)).toDF("tiles")
ds.createOrReplaceTempView("tmp")
- val r1 = ds.select(tile_histogram($"tiles"))
+ val r1 = ds.select(rf_tile_histogram($"tiles"))
assert(r1.first.totalCount === 5 * 5)
write(r1)
val r2 = sql("select hist.* from (select rf_tile_histogram(tiles) as hist from tmp)").as[CellHistogram]
@@ -179,7 +179,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
.fill[Tile](rows)(randomTile(tileSize, tileSize, FloatConstantNoDataCellType))
.toDF("tiles")
ds.createOrReplaceTempView("tmp")
- val agg = ds.select(agg_approx_histogram($"tiles"))
+ val agg = ds.select(rf_agg_approx_histogram($"tiles"))
val histArray = agg.collect()
histArray.length should be (1)
@@ -198,21 +198,21 @@ class TileStatsSpec extends TestEnvironment with TestData {
it("should compute aggregate mean") {
val ds = (Seq.fill[Tile](10)(randomTile(5, 5, FloatCellType)) :+ null).toDF("tiles")
- val agg = ds.select(agg_mean($"tiles"))
- val stats = ds.select(agg_stats($"tiles") as "stats").select($"stats.mean".as[Double])
+ val agg = ds.select(rf_agg_mean($"tiles"))
+ val stats = ds.select(rf_agg_stats($"tiles") as "stats").select($"stats.mean".as[Double])
assert(agg.first() === stats.first())
}
it("should compute aggregate statistics") {
val ds = Seq.fill[Tile](10)(randomTile(5, 5, FloatConstantNoDataCellType)).toDF("tiles")
- val exploded = ds.select(explode_tiles($"tiles"))
+ val exploded = ds.select(rf_explode_tiles($"tiles"))
val (mean, vrnc) = exploded.agg(avg($"tiles"), var_pop($"tiles")).as[(Double, Double)].first
- val stats = ds.select(agg_stats($"tiles") as "stats") ///.as[(Long, Double, Double, Double, Double)]
+ val stats = ds.select(rf_agg_stats($"tiles") as "stats") ///.as[(Long, Double, Double, Double, Double)]
//stats.printSchema()
noException shouldBe thrownBy {
- ds.select(agg_stats($"tiles")).collect()
+ ds.select(rf_agg_stats($"tiles")).collect()
}
val agg = stats.select($"stats.variance".as[Double])
@@ -223,7 +223,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
val agg2 = sql("select stats.* from (select rf_agg_stats(tiles) as stats from tmp)")
assert(agg2.first().getAs[Long]("data_cells") === 250L)
- val agg3 = ds.agg(agg_stats($"tiles") as "stats").select($"stats.mean".as[Double])
+ val agg3 = ds.agg(rf_agg_stats($"tiles") as "stats").select($"stats.mean".as[Double])
assert(mean === agg3.first())
}
@@ -236,7 +236,7 @@ class TileStatsSpec extends TestEnvironment with TestData {
.map(injectND(2)) :+ null).toDF("tiles")
ds.createOrReplaceTempView("tmp")
- val agg = ds.select(agg_local_stats($"tiles") as "stats")
+ val agg = ds.select(rf_agg_local_stats($"tiles") as "stats")
val stats = agg.select("stats.*")
//printStatsRows(stats)
@@ -269,25 +269,25 @@ class TileStatsSpec extends TestEnvironment with TestData {
val dsNd = (Seq.fill(20)(completeTile) :+ incompleteTile :+ null).toDF("tiles")
// counted everything properly
- val countTile = ds.select(agg_local_data_cells($"tiles")).first()
+ val countTile = ds.select(rf_agg_local_data_cells($"tiles")).first()
forAll(countTile.toArray())(i => assert(i === 20))
- val countArray = dsNd.select(agg_local_data_cells($"tiles")).first().toArray()
+ val countArray = dsNd.select(rf_agg_local_data_cells($"tiles")).first().toArray()
val expectedCount =
(completeTile.localDefined().toArray zip incompleteTile.localDefined().toArray()).toSeq.map(
pr => pr._1 * 20 + pr._2)
assert(countArray === expectedCount)
- val countNodataArray = dsNd.select(agg_local_no_data_cells($"tiles")).first().toArray
+ val countNodataArray = dsNd.select(rf_agg_local_no_data_cells($"tiles")).first().toArray
assert(countNodataArray === incompleteTile.localUndefined().toArray)
- val minTile = dsNd.select(agg_local_min($"tiles")).first()
+ val minTile = dsNd.select(rf_agg_local_min($"tiles")).first()
assert(minTile.toArray() === completeTile.toArray())
- val maxTile = dsNd.select(agg_local_max($"tiles")).first()
+ val maxTile = dsNd.select(rf_agg_local_max($"tiles")).first()
assert(maxTile.toArray() === completeTile.toArray())
- val meanTile = dsNd.select(agg_local_mean($"tiles")).first()
+ val meanTile = dsNd.select(rf_agg_local_mean($"tiles")).first()
assert(meanTile.toArray() === completeTile.toArray())
}
}
@@ -300,20 +300,20 @@ class TileStatsSpec extends TestEnvironment with TestData {
.map(injectND(nds)) :+ null).toDF("tiles")
it("should count cells by NoData state") {
- val counts = tiles.select(no_data_cells($"tiles")).collect().dropRight(1)
+ val counts = tiles.select(rf_no_data_cells($"tiles")).collect().dropRight(1)
forEvery(counts)(c => assert(c === nds))
- val counts2 = tiles.select(data_cells($"tiles")).collect().dropRight(1)
+ val counts2 = tiles.select(rf_data_cells($"tiles")).collect().dropRight(1)
forEvery(counts2)(c => assert(c === tsize * tsize - nds))
}
it("should detect all NoData tiles") {
- val ndCount = tiles.select("*").where(is_no_data_tile($"tiles")).count()
+ val ndCount = tiles.select("*").where(rf_is_no_data_tile($"tiles")).count()
ndCount should be(1)
val ndTiles =
(Seq.fill[Tile](count)(ArrayTile.empty(UByteConstantNoDataCellType, tsize, tsize)) :+ null)
.toDF("tiles")
- val ndCount2 = ndTiles.select("*").where(is_no_data_tile($"tiles")).count()
+ val ndCount2 = ndTiles.select("*").where(rf_is_no_data_tile($"tiles")).count()
ndCount2 should be(count + 1)
}
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/TileUDTSpec.scala
similarity index 80%
rename from core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/TileUDTSpec.scala
index b83b94486..3081b2f64 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/TileUDTSpec.scala
@@ -1,5 +1,3 @@
-
-
/*
* This software is licensed under the Apache 2 license, quoted below.
*
@@ -17,19 +15,21 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
-
-import astraea.spark.rasterframes.encoders.CatalystSerializer._
-import astraea.spark.rasterframes.functions.cellTypes
+package org.locationtech.rasterframes
import geotrellis.raster.{CellType, Tile}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.rf._
+import org.apache.spark.sql.types.StringType
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.tiles.ShowableTile
import org.scalatest.Inspectors
/**
- * RasterFrame test rig.
+ * RasterFrameLayer test rig.
*
* @since 7/10/17
*/
@@ -38,12 +38,11 @@ class TileUDTSpec extends TestEnvironment with TestData with Inspectors {
spark.version
val tileEncoder: ExpressionEncoder[Tile] = ExpressionEncoder()
- val TileType = new TileUDT()
implicit val ser = TileUDT.tileSerializer
describe("TileUDT") {
- val tileSizes = Seq(2, 64, 128, 222, 511)
- val ct = cellTypes().filter(_ != "bool")
+ val tileSizes = Seq(2, 7, 64, 128, 511)
+ val ct = functions.cellTypes().filter(_ != "bool")
def forEveryConfig(test: Tile ⇒ Unit): Unit = {
forEvery(tileSizes.combinations(2).toSeq) { case Seq(cols, rows) ⇒
@@ -93,5 +92,13 @@ class TileUDTSpec extends TestEnvironment with TestData with Inspectors {
}
}
}
+
+ it("should provide a pretty-print tile") {
+ import spark.implicits._
+ forEveryConfig { tile =>
+ val stringified = Seq(tile).toDF("tile").select($"tile".cast(StringType)).as[String].first()
+ stringified should be(ShowableTile.show(tile))
+ }
+ }
}
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/encoders/CatalystSerializerSpec.scala
similarity index 86%
rename from core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/encoders/CatalystSerializerSpec.scala
index c489b8d7b..4e8bfdfcc 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/encoders/CatalystSerializerSpec.scala
@@ -19,19 +19,20 @@
*
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
+
import java.time.ZonedDateTime
-import astraea.spark.rasterframes.encoders.StandardEncoders._
-import astraea.spark.rasterframes.model.{CellContext, TileContext, TileDataContext, TileDimensions}
-import astraea.spark.rasterframes.ref.{RasterRef, RasterSource}
-import astraea.spark.rasterframes.{TestData, TestEnvironment}
import geotrellis.proj4._
import geotrellis.raster.{CellSize, CellType, TileLayout, UShortUserDefinedNoDataCellType}
import geotrellis.spark.tiling.LayoutDefinition
-import geotrellis.spark.{Bounds, KeyBounds, SpaceTimeKey, SpatialKey, TileLayerMetadata}
+import geotrellis.spark.{KeyBounds, SpaceTimeKey, SpatialKey, TileLayerMetadata}
import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.locationtech.rasterframes.{TestData, TestEnvironment}
+import org.locationtech.rasterframes.encoders.StandardEncoders._
+import org.locationtech.rasterframes.model.{CellContext, TileContext, TileDataContext, TileDimensions}
+import org.locationtech.rasterframes.ref.{RasterRef, RasterSource}
import org.scalatest.Assertion
class CatalystSerializerSpec extends TestEnvironment with TestData {
@@ -102,8 +103,9 @@ class CatalystSerializerSpec extends TestEnvironment with TestData {
}
it("should serialize RasterRef") {
+ // TODO: Decide if RasterRef should be encoded 'flat', non-'flat', or depends
val src = RasterSource(remoteCOGSingleband1)
- val value = RasterRef(src, Some(src.extent.buffer(-3.0)))
+ val value = RasterRef(src, 0, Some(src.extent.buffer(-3.0)))
assertConsistent(value)
assertInvertable(value)
}
@@ -116,17 +118,17 @@ class CatalystSerializerSpec extends TestEnvironment with TestData {
assertContract(ext)
}
- it("should eserialize ProjectedExtent") {
+ it("should serialize ProjectedExtent") {
val pe = ProjectedExtent(ext, ConusAlbers)
assertContract(pe)
}
- it("should eserialize SpatialKey") {
+ it("should serialize SpatialKey") {
val v = SpatialKey(2, 3)
assertContract(v)
}
- it("should eserialize SpaceTimeKey") {
+ it("should serialize SpaceTimeKey") {
val v = SpaceTimeKey(2, 3, ZonedDateTime.now())
assertContract(v)
}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala
similarity index 90%
rename from core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala
index a0c0bad0e..b27d5cccd 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala
@@ -19,14 +19,13 @@
*
*/
-package astraea.spark.rasterframes.encoders
+package org.locationtech.rasterframes.encoders
import java.io.File
import java.net.URI
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.tiles.ProjectedRasterTile
-import com.vividsolutions.jts.geom.Envelope
+import org.locationtech.rasterframes._
+import org.locationtech.jts.geom.Envelope
import geotrellis.proj4._
import geotrellis.raster.{CellType, Tile, TileFeature}
import geotrellis.spark.{SpaceTimeKey, SpatialKey, TemporalProjectedExtent, TileLayerMetadata}
@@ -34,6 +33,8 @@ import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.spark.sql.Row
import org.apache.spark.sql.functions._
import org.apache.spark.sql.rf.TileUDT
+import org.locationtech.rasterframes.TestEnvironment
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
/**
* Test rig for encoding GT types into Catalyst types.
@@ -151,5 +152,13 @@ class EncodingSpec extends TestEnvironment with TestData {
assert(ds.first === env)
}
}
+ describe("Dataframe encoding ops on spatial types") {
+ it("should code RDD[Point]") {
+ val points = Seq(null, extent.center.jtsGeom, null)
+ val ds = points.toDS
+ write(ds)
+ assert(ds.collect().toSeq === points)
+ }
+ }
}
diff --git a/core/src/test/scala/org/locationtech/rasterframes/expressions/ProjectedLayerMetadataAggregateTest.scala b/core/src/test/scala/org/locationtech/rasterframes/expressions/ProjectedLayerMetadataAggregateTest.scala
new file mode 100644
index 000000000..4d4949357
--- /dev/null
+++ b/core/src/test/scala/org/locationtech/rasterframes/expressions/ProjectedLayerMetadataAggregateTest.scala
@@ -0,0 +1,59 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.expressions
+
+import geotrellis.raster.Tile
+import geotrellis.spark._
+import geotrellis.spark.tiling.FloatingLayoutScheme
+import geotrellis.vector.{Extent, ProjectedExtent}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.encoders.serialized_literal
+import org.locationtech.rasterframes.expressions.aggregates.ProjectedLayerMetadataAggregate
+import org.locationtech.rasterframes.model.TileDimensions
+
+class ProjectedLayerMetadataAggregateTest extends TestEnvironment {
+
+ import spark.implicits._
+
+ describe("ProjectedLayerMetadataAggregate") {
+ it("should collect metadata from RasterFrame") {
+ val image = TestData.sampleGeoTiff
+ val rf = image.projectedRaster.toLayer(60, 65)
+ val crs = rf.crs
+
+ val df = rf.withExtent()
+ .select($"extent", $"tile").as[(Extent, Tile)]
+
+ val tileDims = rf.tileLayerMetadata.merge.tileLayout.tileDimensions
+
+ val (_, tlm) = df
+ .map { case (ext, tile) => (ProjectedExtent(ext, crs), tile) }
+ .rdd.collectMetadata[SpatialKey](FloatingLayoutScheme(tileDims._1, tileDims._2))
+
+ val md = df.select(ProjectedLayerMetadataAggregate(crs, TileDimensions(tileDims), $"extent",
+ serialized_literal(crs), rf_cell_type($"tile"), rf_dimensions($"tile")))
+ val tlm2 = md.first()
+
+ tlm2 should be(tlm)
+ }
+ }
+}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ml/NoDataFilterSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ml/NoDataFilterSpec.scala
similarity index 88%
rename from core/src/test/scala/astraea/spark/rasterframes/ml/NoDataFilterSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/ml/NoDataFilterSpec.scala
index 17a0f25d4..1d4dbc4f6 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/ml/NoDataFilterSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/ml/NoDataFilterSpec.scala
@@ -15,13 +15,16 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.ml
+package org.locationtech.rasterframes.ml
import java.nio.file.Files
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.TestEnvironment
import org.scalatest.BeforeAndAfter
/**
diff --git a/core/src/test/scala/org/locationtech/rasterframes/ml/TileExploderSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ml/TileExploderSpec.scala
new file mode 100644
index 000000000..2d9e2d04c
--- /dev/null
+++ b/core/src/test/scala/org/locationtech/rasterframes/ml/TileExploderSpec.scala
@@ -0,0 +1,48 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2017 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ml
+
+import org.locationtech.rasterframes.TestData
+import geotrellis.raster.Tile
+import org.apache.spark.sql.functions.lit
+import org.locationtech.rasterframes.TestEnvironment
+/**
+ *
+ * @since 2/16/18
+ */
+class TileExploderSpec extends TestEnvironment with TestData {
+ describe("Tile explode transformer") {
+ it("should explode tiles") {
+ import spark.implicits._
+ val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2").withColumn("other", lit("stuff"))
+
+ val exploder = new TileExploder()
+ val newSchema = exploder.transformSchema(df.schema)
+
+ val exploded = exploder.transform(df)
+ assert(newSchema === exploded.schema)
+ assert(exploded.columns.length === 5)
+ assert(exploded.count() === 9)
+ write(exploded)
+ }
+ }
+}
diff --git a/core/src/test/scala/org/locationtech/rasterframes/model/LazyCRSSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/model/LazyCRSSpec.scala
new file mode 100644
index 000000000..1762c402e
--- /dev/null
+++ b/core/src/test/scala/org/locationtech/rasterframes/model/LazyCRSSpec.scala
@@ -0,0 +1,43 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.model
+
+import geotrellis.proj4.{CRS, LatLng, Sinusoidal, WebMercator}
+import org.scalatest._
+
+class LazyCRSSpec extends FunSpec with Matchers {
+ val sinPrj = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
+ val llPrj = "epsg:4326"
+ describe("LazyCRS") {
+ it("should implement equals") {
+ LazyCRS(WebMercator) should be(LazyCRS(WebMercator))
+ LazyCRS(WebMercator) should be(WebMercator)
+ WebMercator should be(LazyCRS(WebMercator))
+ LazyCRS(sinPrj) should be (Sinusoidal)
+ CRS.fromString(sinPrj) should be (LazyCRS(Sinusoidal))
+ LazyCRS(llPrj) should be(LatLng)
+ LazyCRS(LatLng) should be(LatLng)
+ LatLng should be(LazyCRS(llPrj))
+ LatLng should be(LazyCRS(LatLng))
+ }
+ }
+}
diff --git a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala
similarity index 69%
rename from core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala
rename to core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala
index 4efe2b474..f34c89859 100644
--- a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala
+++ b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala
@@ -19,15 +19,16 @@
*
*/
-package astraea.spark.rasterframes.ref
+package org.locationtech.rasterframes.ref
-import astraea.spark.rasterframes.TestEnvironment.ReadMonitor
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.expressions.transformers._
-import astraea.spark.rasterframes.expressions.accessors._
-import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.expressions.accessors._
+import org.locationtech.rasterframes.expressions.generators._
+import RasterRef.RasterRefTile
import geotrellis.raster.Tile
import geotrellis.vector.Extent
+import org.apache.spark.sql.Encoders
+import org.locationtech.rasterframes.TestEnvironment
/**
*
@@ -36,6 +37,7 @@ import geotrellis.vector.Extent
*/
//noinspection TypeAnnotation
class RasterRefSpec extends TestEnvironment with TestData {
+
def sub(e: Extent) = {
val c = e.center
val w = e.width
@@ -44,15 +46,15 @@ class RasterRefSpec extends TestEnvironment with TestData {
}
trait Fixture {
- val counter = new ReadMonitor
- val src = RasterSource(remoteCOGSingleband1, Some(counter))
- val fullRaster = RasterRef(src)
+ val src = RasterSource(remoteCOGSingleband1)
+ val fullRaster = RasterRef(src, 0, None)
val subExtent = sub(src.extent)
- val subRaster = RasterRef(src, Option(subExtent))
+ val subRaster = RasterRef(src, 0, Some(subExtent))
}
import spark.implicits._
+ implicit val enc = Encoders.tuple(Encoders.scalaInt, RasterRef.rrEncoder)
describe("GetCRS Expression") {
it("should read from RasterRef") {
new Fixture {
@@ -94,7 +96,6 @@ class RasterRefSpec extends TestEnvironment with TestData {
new Fixture {
val ds = Seq((1, RasterRefTile(fullRaster): Tile)).toDF("index", "ref")
val dims = ds.select(GetDimensions($"ref"))
- println(counter)
assert(dims.count() === 1)
assert(dims.first() !== null)
}
@@ -103,19 +104,18 @@ class RasterRefSpec extends TestEnvironment with TestData {
new Fixture {
val ds = Seq((1, RasterRefTile(subRaster): Tile)).toDF("index", "ref")
val dims = ds.select(GetDimensions($"ref"))
- println(counter)
assert(dims.count() === 1)
assert(dims.first() !== null)
}
}
}
- describe("GetExtent Expression") {
+ describe("GetExtent") {
it("should read from RasterRef") {
import spark.implicits._
new Fixture {
val ds = Seq((1, fullRaster)).toDF("index", "ref")
- val extent = ds.select(GetExtent($"ref"))
+ val extent = ds.select(rf_extent($"ref"))
assert(extent.count() === 1)
assert(extent.first() !== null)
}
@@ -124,7 +124,7 @@ class RasterRefSpec extends TestEnvironment with TestData {
import spark.implicits._
new Fixture {
val ds = Seq((1, subRaster)).toDF("index", "ref")
- val extent = ds.select(GetExtent($"ref"))
+ val extent = ds.select(rf_extent($"ref"))
assert(extent.count() === 1)
assert(extent.first() !== null)
}
@@ -135,23 +135,18 @@ class RasterRefSpec extends TestEnvironment with TestData {
it("should delay reading") {
new Fixture {
assert(subRaster.cellType === src.cellType)
- assert(counter.reads === 0)
}
}
it("should support subextents") {
new Fixture {
assert(subRaster.cols.toDouble === src.cols * 0.01 +- 2.0)
assert(subRaster.rows.toDouble === src.rows * 0.01 +- 2.0)
- assert(counter.reads === 0)
//subRaster.tile.rescale(0, 255).renderPng().write("target/foo1.png")
}
}
it("should be realizable") {
new Fixture {
- assert(counter.reads === 0)
assert(subRaster.tile.statistics.map(_.dataCells) === Some(subRaster.cols * subRaster.rows))
- assert(counter.reads > 0)
- println(counter)
}
}
@@ -166,29 +161,52 @@ class RasterRefSpec extends TestEnvironment with TestData {
val data = buf.toByteArray
val in = new ObjectInputStream(new ByteArrayInputStream(data))
val recovered = in.readObject()
- assert(subRaster === recovered)
+ subRaster should be (recovered)
}
}
}
- describe("CreateRasterRefs") {
- it("should convert and expand RasterSource") {
- new Fixture {
- import spark.implicits._
- val df = Seq(src).toDF("src")
- val refs = df.select(RasterSourceToRasterRefs($"src"))
- assert(refs.count() > 1)
+ describe("RasterRef creation") {
+ it("should realize subiles of proper size") {
+ val src = RasterSource(remoteMODIS)
+ val dims = src
+ .layoutExtents(NOMINAL_TILE_DIMS)
+ .map(e => RasterRef(src, 0, Some(e)))
+ .map(_.dimensions)
+ .distinct
+
+ forEvery(dims) { d =>
+ d._1 should be <= NOMINAL_TILE_SIZE
+ d._2 should be <= NOMINAL_TILE_SIZE
}
}
+ }
- it("should work with tile realization") {
- new Fixture {
- import spark.implicits._
- val df = Seq(src).toDF("src")
- val refs = df.select(RasterSourceToRasterRefs(true, $"src"))
- assert(refs.count() > 1)
- }
+ describe("RasterSourceToRasterRefs") {
+ it("should convert and expand RasterSource") {
+ val src = RasterSource(remoteMODIS)
+ import spark.implicits._
+ val df = Seq(src).toDF("src")
+ val refs = df.select(RasterSourceToRasterRefs(None, Seq(0), $"src"))
+ refs.count() should be (1)
}
+ it("should properly realize subtiles") {
+ val src = RasterSource(remoteMODIS)
+ import spark.implicits._
+ val df = Seq(src).toDF("src")
+ val refs = df.select(RasterSourceToRasterRefs(Some(NOMINAL_TILE_DIMS), Seq(0), $"src") as "proj_raster")
+
+ refs.count() shouldBe > (1L)
+
+
+ val dims = refs.select(rf_dimensions($"proj_raster")).distinct().collect()
+ forEvery(dims) { r =>
+ r.cols should be <=NOMINAL_TILE_SIZE
+ r.rows should be <=NOMINAL_TILE_SIZE
+ }
+
+ dims.foreach(println)
+ }
}
}
diff --git a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala
new file mode 100644
index 000000000..1e62b95f5
--- /dev/null
+++ b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala
@@ -0,0 +1,182 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2018 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.ref
+
+import java.net.URI
+
+import org.locationtech.rasterframes._
+import geotrellis.vector.Extent
+import org.apache.spark.sql.rf.RasterSourceUDT
+import org.locationtech.rasterframes.model.TileDimensions
+
+
+class RasterSourceSpec extends TestEnvironment with TestData {
+ def sub(e: Extent) = {
+ val c = e.center
+ val w = e.width
+ val h = e.height
+ Extent(c.x, c.y, c.x + w * 0.1, c.y + h * 0.1)
+ }
+
+ describe("General RasterSource") {
+ it("should identify as UDT") {
+ assert(new RasterSourceUDT() === new RasterSourceUDT())
+ }
+ val rs = RasterSource(getClass.getResource("/L8-B8-Robinson-IL.tiff").toURI)
+ it("should compute nominal tile layout bounds") {
+ val bounds = rs.layoutBounds(TileDimensions(65, 60))
+ val agg = bounds.reduce(_ combine _)
+ agg should be (rs.gridBounds)
+ }
+ it("should compute nominal tile layout extents") {
+ val extents = rs.layoutExtents(TileDimensions(63, 63))
+ val agg = extents.reduce(_ combine _)
+ agg should be (rs.extent)
+ }
+ it("should reassemble correct grid from extents") {
+ val dims = TileDimensions(63, 63)
+ val ext = rs.layoutExtents(dims).head
+ val bounds = rs.layoutBounds(dims).head
+ rs.rasterExtent.gridBoundsFor(ext) should be (bounds)
+ }
+ it("should compute layout extents from scene with fractional gsd") {
+
+ val rs = RasterSource(remoteMODIS)
+
+ val dims = rs.layoutExtents(NOMINAL_TILE_DIMS)
+ .map(e => rs.rasterExtent.gridBoundsFor(e, false))
+ .map(b => (b.width, b.height))
+ .distinct
+ forEvery(dims) { d =>
+ d._1 should be <= NOMINAL_TILE_SIZE
+ d._2 should be <= NOMINAL_TILE_SIZE
+ }
+ }
+ }
+
+ describe("HTTP RasterSource") {
+ it("should support metadata querying over HTTP") {
+ withClue("remoteCOGSingleband") {
+ val src = RasterSource(remoteCOGSingleband1)
+ assert(!src.extent.isEmpty)
+ }
+ withClue("remoteCOGMultiband") {
+ val src = RasterSource(remoteCOGMultiband)
+ assert(!src.extent.isEmpty)
+ }
+ }
+ it("should read sub-tile") {
+ withClue("remoteCOGSingleband") {
+ val src = RasterSource(remoteCOGSingleband1)
+ val raster = src.read(sub(src.extent))
+ assert(raster.size > 0 && raster.size < src.size)
+ }
+ withClue("remoteCOGMultiband") {
+ val src = RasterSource(remoteCOGMultiband)
+ val raster = src.read(sub(src.extent))
+ assert(raster.size > 0 && raster.size < src.size)
+ }
+ }
+ it("should Java serialize") {
+ import java.io._
+ val src = RasterSource(remoteCOGSingleband1)
+ val buf = new java.io.ByteArrayOutputStream()
+ val out = new ObjectOutputStream(buf)
+ out.writeObject(src)
+ out.close()
+
+ val data = buf.toByteArray
+ val in = new ObjectInputStream(new ByteArrayInputStream(data))
+ val recovered = in.readObject().asInstanceOf[RasterSource]
+ assert(src.toString === recovered.toString)
+ }
+ }
+ describe("File RasterSource") {
+ it("should support metadata querying of file") {
+ val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toUri
+ val src = RasterSource(localSrc)
+ assert(!src.extent.isEmpty)
+ }
+ it("should interpret no scheme as file://"){
+ val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toString
+ val schemelessUri = new URI(localSrc)
+ schemelessUri.getScheme should be (null)
+ val src = RasterSource(schemelessUri)
+ assert(!src.extent.isEmpty)
+ }
+ }
+
+ if(GDALRasterSource.hasGDAL) {
+ describe("GDAL Rastersource") {
+ val gdal = GDALRasterSource(cogPath)
+ val jvm = JVMGeoTiffRasterSource(cogPath)
+ it("should compute the same metadata as JVM RasterSource") {
+ gdal.cellType should be(jvm.cellType)
+ }
+ it("should compute the same dimensions as JVM RasterSource") {
+ val dims = TileDimensions(128, 128)
+ gdal.extent should be(jvm.extent)
+ gdal.rasterExtent should be(jvm.rasterExtent)
+ gdal.cellSize should be(jvm.cellSize)
+ gdal.layoutBounds(dims) should contain allElementsOf jvm.layoutBounds(dims)
+ gdal.layoutExtents(dims) should contain allElementsOf jvm.layoutExtents(dims)
+ }
+
+
+ it("should support vsi file paths") {
+ val archivePath = geotiffDir.resolve("L8-archive.zip")
+ val archiveURI = URI.create("gdal://vsizip/" + archivePath.toString + "/L8-RGB-VA.tiff")
+ val gdal = GDALRasterSource(archiveURI)
+
+ gdal.bandCount should be (3)
+ }
+
+ it("should interpret no scheme as file://") {
+ val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toString
+ val schemelessUri = new URI(localSrc)
+ val gdal = GDALRasterSource(schemelessUri)
+ val jvm = JVMGeoTiffRasterSource(schemelessUri)
+ gdal.extent should be (jvm.extent)
+ gdal.cellSize should be(jvm.cellSize)
+ }
+ }
+ }
+
+ describe("RasterSource tile construction") {
+ it("should read all tiles") {
+ val src = RasterSource(remoteMODIS)
+
+ val subrasters = src.readAll()
+
+ val collected = subrasters.map(_.extent).reduceLeft(_.combine(_))
+
+ assert(src.extent.xmin === collected.xmin +- 0.01)
+ assert(src.extent.ymin === collected.ymin +- 0.01)
+ assert(src.extent.xmax === collected.xmax +- 0.01)
+ assert(src.extent.ymax === collected.ymax +- 0.01)
+
+ val totalCells = subrasters.map(_.size).sum
+
+ assert(totalCells === src.size)
+ }
+ }
+}
diff --git a/datasource/build.sbt b/datasource/build.sbt
deleted file mode 100644
index b42fe3d3f..000000000
--- a/datasource/build.sbt
+++ /dev/null
@@ -1,35 +0,0 @@
-moduleName := "rasterframes-datasource"
-
-libraryDependencies ++= Seq(
- geotrellis("s3").value,
- spark("core").value % Provided,
- spark("mllib").value % Provided,
- spark("sql").value % Provided
-)
-
-initialCommands in console := """
- |import astraea.spark.rasterframes._
- |import geotrellis.raster._
- |import geotrellis.spark.io.kryo.KryoRegistrator
- |import org.apache.spark.serializer.KryoSerializer
- |import org.apache.spark.sql._
- |import org.apache.spark.sql.functions._
- |import astraea.spark.rasterframes.datasource.geotrellis._
- |import astraea.spark.rasterframes.datasource.geotiff._
- |implicit val spark = SparkSession.builder()
- | .master("local[*]")
- | .appName(getClass.getName)
- | .config("spark.serializer", classOf[KryoSerializer].getName)
- | .config("spark.kryoserializer.buffer.max", "500m")
- | .config("spark.kryo.registrationRequired", "false")
- | .config("spark.kryo.registrator", classOf[KryoRegistrator].getName)
- | .getOrCreate()
- | .withRasterFrames
- |spark.sparkContext.setLogLevel("ERROR")
- |import spark.implicits._
- |
- |""".stripMargin
-
-cleanupCommands in console := """
- |spark.stop()
- |""".stripMargin
\ No newline at end of file
diff --git a/datasource/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/datasource/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 26a271f13..a44f6fccd 100644
--- a/datasource/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/datasource/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,3 +1,5 @@
-astraea.spark.rasterframes.datasource.geotiff.DefaultSource
-astraea.spark.rasterframes.datasource.geotrellis.DefaultSource
-astraea.spark.rasterframes.datasource.geotrellis.GeoTrellisCatalog
+org.locationtech.rasterframes.datasource.geotiff.GeoTiffDataSource
+org.locationtech.rasterframes.datasource.geotrellis.GeoTrellisLayerDataSource
+org.locationtech.rasterframes.datasource.geotrellis.GeoTrellisCatalog
+org.locationtech.rasterframes.datasource.raster.RasterSourceDataSource
+org.locationtech.rasterframes.datasource.geojson.GeoJsonDataSource
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala
deleted file mode 100644
index 74acbbc98..000000000
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package astraea.spark.rasterframes.datasource.geotiff
-
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.util._
-import astraea.spark.rasterframes.datasource._
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider}
-import org.apache.spark.sql.types.LongType
-import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode, functions ⇒ F}
-import _root_.geotrellis.raster.io.geotiff.{GeoTiffOptions, MultibandGeoTiff, Tags, Tiled}
-import _root_.geotrellis.raster.io.geotiff.compression._
-import _root_.geotrellis.raster.io.geotiff.tags.codes.ColorSpace
-
-/**
- * Spark SQL data source over GeoTIFF files.
- * @since 1/14/18
- */
-class DefaultSource extends DataSourceRegister
- with RelationProvider with CreatableRelationProvider
- with DataSourceOptions with LazyLogging {
- def shortName() = DefaultSource.SHORT_NAME
-
- def path(parameters: Map[String, String]) =
- uriParam(PATH_PARAM, parameters)
-
- def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = {
- val pathO = path(parameters)
- require(pathO.isDefined, "Valid URI 'path' parameter required.")
- sqlContext.withRasterFrames
-
- val p = pathO.get
-
- if(p.getPath.contains("*")) {
- val bandCount = parameters.get(DefaultSource.BAND_COUNT_PARAM).map(_.toInt).getOrElse(1)
- GeoTiffCollectionRelation(sqlContext, p, bandCount)
- }
- else GeoTiffRelation(sqlContext, p)
- }
-
- override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = {
- val pathO = path(parameters)
- require(pathO.isDefined, "Valid URI 'path' parameter required.")
- require(pathO.get.getScheme == "file" || pathO.get.getScheme == null, "Currently only 'file://' destinations are supported")
- sqlContext.withRasterFrames
-
- require(data.isRF, "GeoTIFF can only be constructed from a RasterFrame")
- val rf = data.certify
-
- // If no desired image size is given, write at full size.
- lazy val (fullResCols, fullResRows) = {
- // get the layout size given that the tiles may be heterogenously sized
- // first get any valid row and column in the spatial key structure
- val sk = rf.select(SPATIAL_KEY_COLUMN).first()
-
- val tc = rf.tileColumns.head
-
- val c = rf
- .where(SPATIAL_KEY_COLUMN("row") === sk.row)
- .agg(
- F.sum(tile_dimensions(tc)("cols") cast(LongType))
- ).first()
- .getLong(0)
-
- val r = rf
- .where(SPATIAL_KEY_COLUMN("col") === sk.col)
- .agg(
- F.sum(tile_dimensions(tc)("rows") cast(LongType))
- ).first()
- .getLong(0)
-
- (c, r)
- }
-
- val cols = numParam(DefaultSource.IMAGE_WIDTH_PARAM, parameters).getOrElse(fullResCols)
- val rows = numParam(DefaultSource.IMAGE_HEIGHT_PARAM, parameters).getOrElse(fullResRows)
-
- require(cols <= Int.MaxValue && rows <= Int.MaxValue, s"Can't construct a GeoTIFF of size $cols x $rows. (Too big!)")
-
- // Should we really play traffic cop here?
- if(cols.toDouble * rows * 64.0 > Runtime.getRuntime.totalMemory() * 0.5)
- logger.warn(s"You've asked for the construction of a very large image ($cols x $rows), destined for ${pathO.get}. Out of memory error likely.")
-
- val tcols = rf.tileColumns
- val raster = rf.toMultibandRaster(tcols, cols.toInt, rows.toInt)
-
- // We make some assumptions here.... eventually have column metadata encode this.
- val colorSpace = tcols.size match {
- case 3 | 4 ⇒ ColorSpace.RGB
- case _ ⇒ ColorSpace.BlackIsZero
- }
-
- val compress = parameters.get(DefaultSource.COMPRESS_PARAM).map(_.toBoolean).getOrElse(false)
- val options = GeoTiffOptions(Tiled, if (compress) DeflateCompression else NoCompression, colorSpace)
- val tags = Tags(
- RFBuildInfo.toMap.filter(_._1.startsWith("rf")).mapValues(_.toString),
- tcols.map(c ⇒ Map("RF_COL" -> c.columnName)).toList
- )
- val geotiff = new MultibandGeoTiff(raster.tile, raster.extent, raster.crs, tags, options)
-
- logger.debug(s"Writing DataFrame to GeoTIFF ($cols x $rows) at ${pathO.get}")
- geotiff.write(pathO.get.getPath)
- GeoTiffRelation(sqlContext, pathO.get)
- }
-}
-
-object DefaultSource {
- final val SHORT_NAME = "geotiff"
- final val PATH_PARAM = "path"
- final val IMAGE_WIDTH_PARAM = "imageWidth"
- final val IMAGE_HEIGHT_PARAM = "imageWidth"
- final val COMPRESS_PARAM = "compress"
- final val BAND_COUNT_PARAM = "bandCount"
-}
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/package.scala b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/package.scala
deleted file mode 100644
index 6e607c3b4..000000000
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/package.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package astraea.spark.rasterframes.datasource
-
-import java.net.URI
-
-import astraea.spark.rasterframes._
-import org.apache.spark.sql.{DataFrameReader, DataFrameWriter}
-import shapeless.tag
-import shapeless.tag.@@
-
-/**
- * Extension methods enabled by this module.
- *
- * @since 1/16/18
- */
-package object geotiff {
- /** Tagged type construction for enabling type-safe extension methods for loading
- * a RasterFrame in expected form. */
- type GeoTiffRasterFrameReader = DataFrameReader @@ GeoTiffRasterFrameReaderTag
- trait GeoTiffRasterFrameReaderTag
-
- /** Adds `geotiff` format specifier to `DataFrameReader`. */
- implicit class DataFrameReaderHasGeoTiffFormat(val reader: DataFrameReader) {
- def geotiff: GeoTiffRasterFrameReader =
- tag[GeoTiffRasterFrameReaderTag][DataFrameReader](reader.format(DefaultSource.SHORT_NAME))
- }
-
- implicit class DataFrameWriterHasGeoTiffFormat[T](val writer: DataFrameWriter[T]) {
- def geotiff: DataFrameWriter[T] = writer.format(DefaultSource.SHORT_NAME)
- }
-
- /** Adds `loadRF` to appropriately tagged `DataFrameReader` */
- implicit class GeoTiffReaderWithRF(val reader: GeoTiffRasterFrameReader) {
- def loadRF(path: URI): RasterFrame = reader.load(path.toASCIIString).asRF
- }
-}
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/DataSourceOptions.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/DataSourceOptions.scala
similarity index 90%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/DataSourceOptions.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/DataSourceOptions.scala
index 5baa8d67f..d620dd4fd 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/DataSourceOptions.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/DataSourceOptions.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,10 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.datasource
+package org.locationtech.rasterframes.datasource
/**
* Key constants associated with DataFrameReader options for certain DataSource implementations.
diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/DOM.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/DOM.scala
similarity index 92%
rename from experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/DOM.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/DOM.scala
index 2dbcb7f0c..dfbbb92f3 100644
--- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/DOM.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/DOM.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,16 +15,17 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.experimental.datasource.geojson
+package org.locationtech.rasterframes.datasource.geojson
-import com.vividsolutions.jts.geom.{Envelope, Geometry}
-import com.vividsolutions.jts.io.geojson.{GeoJsonReader, GeoJsonWriter}
import geotrellis.vector.Extent
-import spray.json._
+import org.locationtech.jts.geom.{Envelope, Geometry}
+import org.locationtech.jts.io.geojson.{GeoJsonReader, GeoJsonWriter}
import spray.json.DefaultJsonProtocol._
+import spray.json._
/**
* Lightweight DOM for parsing GeoJSON feature sets.
diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSource.scala
similarity index 94%
rename from experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSource.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSource.scala
index f042fbd1c..1bda41cd7 100644
--- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSource.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSource.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,13 +15,12 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.experimental.datasource.geojson
+package org.locationtech.rasterframes.datasource.geojson
-import astraea.spark.rasterframes.experimental.datasource.geojson.DOM._
-import com.vividsolutions.jts.geom.Geometry
import org.apache.spark.annotation.Experimental
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.jts.JTSTypes
@@ -29,6 +28,8 @@ import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationP
import org.apache.spark.sql.types.{DataTypes, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
import org.locationtech.geomesa.spark.jts._
+import org.locationtech.jts.geom.Geometry
+import org.locationtech.rasterframes.datasource.geojson.DOM._
import spray.json.DefaultJsonProtocol._
import spray.json._
diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/package.scala
similarity index 94%
rename from experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/package.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/package.scala
index 262c255d1..6c49d75bc 100644
--- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/package.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/package.scala
@@ -19,7 +19,8 @@
*
*/
-package astraea.spark.rasterframes.experimental.datasource
+package org.locationtech.rasterframes.datasource
+
import org.apache.spark.sql.DataFrameReader
/**
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala
similarity index 73%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala
index 2f69d4425..3148a67d0 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,42 +15,35 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.datasource.geotiff
+package org.locationtech.rasterframes.datasource.geotiff
import java.net.URI
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.datasource.geotiff.GeoTiffCollectionRelation.Cols
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.util._
import geotrellis.proj4.CRS
import geotrellis.spark.io.hadoop.HadoopGeoTiffRDD
import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.hadoop.fs.Path
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.jts.JTSTypes
import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.sources.{BaseRelation, PrunedScan}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SQLContext}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.datasource.geotiff.GeoTiffCollectionRelation.Cols
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.util._
-/**
- *
- *
- * @since 7/31/18
- */
+private[geotiff]
case class GeoTiffCollectionRelation(sqlContext: SQLContext, uri: URI, bandCount: Int) extends BaseRelation with PrunedScan {
override def schema: StructType = StructType(Seq(
StructField(Cols.PATH, StringType, false),
- StructField(EXTENT_COLUMN.columnName, CatalystSerializer[Extent].schema, nullable = true),
- StructField(CRS_COLUMN.columnName, CatalystSerializer[CRS].schema, false)
-// StructField(METADATA_COLUMN.columnName,
-// DataTypes.createMapType(StringType, StringType, false)
-// )
+ StructField(EXTENT_COLUMN.columnName, schemaOf[Extent], nullable = true),
+ StructField(CRS_COLUMN.columnName, schemaOf[CRS], false)
) ++ (
if(bandCount == 1) Seq(StructField(Cols.TL, new TileUDT, false))
else for(b ← 1 to bandCount) yield StructField(Cols.TL + "_" + b, new TileUDT, nullable = true)
@@ -63,14 +56,12 @@ case class GeoTiffCollectionRelation(sqlContext: SQLContext, uri: URI, bandCount
val columnIndexes = requiredColumns.map(schema.fieldIndex)
-
-
HadoopGeoTiffRDD.multiband(new Path(uri.toASCIIString), keyer, HadoopGeoTiffRDD.Options.DEFAULT)
.map { case ((path, pe), mbt) ⇒
val entries = columnIndexes.map {
case 0 ⇒ path
- case 1 ⇒ CatalystSerializer[Extent].toRow(pe.extent)
- case 2 ⇒ CatalystSerializer[CRS].toRow(pe.crs)
+ case 1 ⇒ pe.extent.toRow
+ case 2 ⇒ pe.crs.toRow
case i if i > 2 ⇒ {
if(bandCount == 1 && mbt.bandCount > 2) mbt.color()
else mbt.band(i - 3)
@@ -78,7 +69,6 @@ case class GeoTiffCollectionRelation(sqlContext: SQLContext, uri: URI, bandCount
}
Row(entries: _*)
}
-
}
}
@@ -86,7 +76,7 @@ object GeoTiffCollectionRelation {
object Cols {
lazy val PATH = "path"
lazy val CRS = "crs"
- lazy val EX = BOUNDS_COLUMN.columnName
+ lazy val EX = GEOMETRY_COLUMN.columnName
lazy val TL = TILE_COLUMN.columnName
}
}
diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSource.scala
new file mode 100644
index 000000000..77781a781
--- /dev/null
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSource.scala
@@ -0,0 +1,191 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2018 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.datasource.geotiff
+
+import java.net.URI
+
+import _root_.geotrellis.proj4.CRS
+import _root_.geotrellis.raster._
+import _root_.geotrellis.raster.io.geotiff.compression._
+import _root_.geotrellis.raster.io.geotiff.tags.codes.ColorSpace
+import _root_.geotrellis.raster.io.geotiff.{GeoTiffOptions, MultibandGeoTiff, Tags, Tiled}
+import _root_.geotrellis.spark._
+import com.typesafe.scalalogging.LazyLogging
+import org.apache.spark.sql._
+import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.datasource._
+import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate.ProjectedRasterDefinition
+import org.locationtech.rasterframes.expressions.aggregates.{ProjectedLayerMetadataAggregate, TileRasterizerAggregate}
+import org.locationtech.rasterframes.model.{LazyCRS, TileDimensions}
+import org.locationtech.rasterframes.util._
+
+/**
+ * Spark SQL data source over GeoTIFF files.
+ */
+class GeoTiffDataSource
+ extends DataSourceRegister with RelationProvider with CreatableRelationProvider with DataSourceOptions with LazyLogging {
+ import GeoTiffDataSource._
+
+ def shortName() = GeoTiffDataSource.SHORT_NAME
+
+ def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = {
+ require(parameters.path.isDefined, "Valid URI 'path' parameter required.")
+ sqlContext.withRasterFrames
+
+ val p = parameters.path.get
+
+ if (p.getPath.contains("*")) {
+ val bandCount = parameters.get(GeoTiffDataSource.BAND_COUNT_PARAM).map(_.toInt).getOrElse(1)
+ GeoTiffCollectionRelation(sqlContext, p, bandCount)
+ } else GeoTiffRelation(sqlContext, p)
+ }
+
+ override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], df: DataFrame): BaseRelation = {
+ require(parameters.path.isDefined, "Valid URI 'path' parameter required.")
+ val path = parameters.path.get
+ require(path.getScheme == "file" || path.getScheme == null, "Currently only 'file://' destinations are supported")
+ sqlContext.withRasterFrames
+
+ val tileCols = df.tileColumns
+
+ require(tileCols.nonEmpty, "Could not find any tile columns.")
+
+ val raster = if (df.isAlreadyLayer) {
+ val layer = df.certify
+ val tlm = layer.tileLayerMetadata.merge
+
+ // If no desired image size is given, write at full size.
+ val TileDimensions(cols, rows) = parameters.rasterDimensions
+ .getOrElse {
+ val actualSize = tlm.layout.toRasterExtent().gridBoundsFor(tlm.extent)
+ TileDimensions(actualSize.width, actualSize.height)
+ }
+
+ // Should we really play traffic cop here?
+ if (cols.toDouble * rows * 64.0 > Runtime.getRuntime.totalMemory() * 0.5)
+ logger.warn(
+ s"You've asked for the construction of a very large image ($cols x $rows), destined for ${path}. Out of memory error likely.")
+
+ layer.toMultibandRaster(tileCols, cols.toInt, rows.toInt)
+ } else {
+ require(parameters.crs.nonEmpty, "A destination CRS must be provided")
+ require(tileCols.nonEmpty, "need at least one tile column")
+
+ // Grab CRS to project into
+ val destCRS = parameters.crs.get
+
+ // Select the anchoring Tile, Extent and CRS columns
+ val (extCol, crsCol, tileCol) = {
+ // Favor "ProjectedRaster" columns
+ val prCols = df.projRasterColumns
+ if (prCols.nonEmpty) {
+ (rf_extent(prCols.head), rf_crs(prCols.head), rf_tile(prCols.head))
+ } else {
+ // If no "ProjectedRaster" column, look for single Extent and CRS columns.
+ val crsCols = df.crsColumns
+ require(crsCols.size == 1, "Exactly one CRS column must be in DataFrame")
+ val extentCols = df.extentColumns
+ require(extentCols.size == 1, "Exactly one Extent column must be in DataFrame")
+ (extentCols.head, crsCols.head, tileCols.head)
+ }
+ }
+
+ // Scan table and constuct what the TileLayerMetadata would be in the specified destination CRS.
+ val tlm: TileLayerMetadata[SpatialKey] = df
+ .select(
+ ProjectedLayerMetadataAggregate(
+ destCRS,
+ extCol,
+ crsCol,
+ rf_cell_type(tileCol),
+ rf_dimensions(tileCol)
+ ))
+ .first()
+ logger.debug(s"Contructed TileLayerMetadata: ${tlm.toString}")
+
+ val c = ProjectedRasterDefinition(tlm)
+
+ val config = parameters.rasterDimensions
+ .map { dims =>
+ c.copy(totalCols = dims.cols, totalRows = dims.rows)
+ }
+ .getOrElse(c)
+
+ val aggs = tileCols
+ .map(t => TileRasterizerAggregate(config, crsCol, extCol, rf_tile(t))("tile").as(t.columnName))
+
+ val agg = df.select(aggs: _*)
+
+ val row = agg.first()
+
+ val bands = for (i <- 0 until row.size) yield row.getAs[Tile](i)
+
+ ProjectedRaster(MultibandTile(bands), tlm.extent, tlm.crs)
+ }
+
+ val tags = Tags(
+ RFBuildInfo.toMap.filter(_._1.toLowerCase().contains("version")).mapValues(_.toString),
+ tileCols.map(c => Map("RF_COL" -> c.columnName)).toList
+ )
+
+ // We make some assumptions here.... eventually have column metadata encode this.
+ val colorSpace = tileCols.size match {
+ case 3 | 4 => ColorSpace.RGB
+ case _ => ColorSpace.BlackIsZero
+ }
+
+ val tiffOptions = GeoTiffOptions(Tiled, if (parameters.compress) DeflateCompression else NoCompression, colorSpace)
+
+ val geotiff = new MultibandGeoTiff(raster.tile, raster.extent, raster.crs, tags, tiffOptions)
+
+ logger.debug(s"Writing DataFrame to GeoTIFF (${geotiff.cols} x ${geotiff.rows}) at ${path}")
+ geotiff.write(path.getPath)
+ GeoTiffRelation(sqlContext, path)
+ }
+}
+
+object GeoTiffDataSource {
+ final val SHORT_NAME = "geotiff"
+ final val PATH_PARAM = "path"
+ final val IMAGE_WIDTH_PARAM = "imageWidth"
+ final val IMAGE_HEIGHT_PARAM = "imageHeight"
+ final val COMPRESS_PARAM = "compress"
+ final val CRS_PARAM = "crs"
+ final val BAND_COUNT_PARAM = "bandCount"
+
+ private[geotiff] implicit class ParamsDictAccessors(val parameters: Map[String, String]) extends AnyVal {
+ def path: Option[URI] = uriParam(PATH_PARAM, parameters)
+ def compress: Boolean = parameters.get(COMPRESS_PARAM).exists(_.toBoolean)
+ def crs: Option[CRS] = parameters.get(CRS_PARAM).map(s => LazyCRS(s))
+ def rasterDimensions: Option[TileDimensions] = {
+ numParam(IMAGE_WIDTH_PARAM, parameters)
+ .zip(numParam(IMAGE_HEIGHT_PARAM, parameters))
+ .map {
+ case (cols, rows) =>
+ require(cols <= Int.MaxValue && rows <= Int.MaxValue, s"Can't construct a GeoTIFF of size $cols x $rows. (Too big!)")
+ TileDimensions(cols.toInt, rows.toInt)
+ }
+ .headOption
+ }
+ }
+}
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffRelation.scala
similarity index 85%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffRelation.scala
index 8503171c8..b08ebc830 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffRelation.scala
@@ -15,15 +15,18 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotiff
+package org.locationtech.rasterframes.datasource.geotiff
import java.net.URI
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.encoders.CatalystSerializer
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.util._
+import com.typesafe.scalalogging.LazyLogging
import geotrellis.proj4.CRS
import geotrellis.spark._
import geotrellis.spark.io._
@@ -66,8 +69,8 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio
StructType(Seq(
StructField(SPATIAL_KEY_COLUMN.columnName, skSchema, nullable = false, skMetadata),
- StructField(EXTENT_COLUMN.columnName, CatalystSerializer[Extent].schema, nullable = true),
- StructField(CRS_COLUMN.columnName, CatalystSerializer[CRS].schema, nullable = true),
+ StructField(EXTENT_COLUMN.columnName, schemaOf[Extent], nullable = true),
+ StructField(CRS_COLUMN.columnName, schemaOf[CRS], nullable = true),
StructField(METADATA_COLUMN.columnName,
DataTypes.createMapType(StringType, StringType, false)
)
@@ -85,8 +88,7 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio
val trans = tlm.mapTransform
val metadata = info.tags.headTags
- val extSer = CatalystSerializer[Extent]
- val encodedCRS = CatalystSerializer[CRS].toRow(tlm.crs)
+ val encodedCRS = tlm.crs.toRow
if(info.segmentLayout.isTiled) {
// TODO: Figure out how to do tile filtering via the range reader.
@@ -98,7 +100,7 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio
val gb = trans.extentToBounds(pe.extent)
val entries = columnIndexes.map {
case 0 => SpatialKey(gb.colMin, gb.rowMin)
- case 1 => extSer.toRow(pe.extent)
+ case 1 => pe.extent.toRow
case 2 => encodedCRS
case 3 => metadata
case n => tiles.band(n - 4)
@@ -107,16 +109,15 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio
}
}
else {
- logger.warn("GeoTIFF is not already tiled. In-memory read required: " + uri)
+ //logger.warn("GeoTIFF is not already tiled. In-memory read required: " + uri)
val geotiff = HadoopGeoTiffReader.readMultiband(new Path(uri))
val rdd = sqlContext.sparkContext.makeRDD(Seq((geotiff.projectedExtent, Shims.toArrayTile(geotiff.tile))))
-
rdd.tileToLayout(tlm)
.map { case (sk, tiles) ⇒
val entries = columnIndexes.map {
case 0 => sk
- case 1 => extSer.toRow(trans.keyToExtent(sk))
+ case 1 => trans.keyToExtent(sk).toRow
case 2 => encodedCRS
case 3 => metadata
case n => tiles.band(n - 4)
diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/package.scala
new file mode 100644
index 000000000..75bdc7e76
--- /dev/null
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/package.scala
@@ -0,0 +1,81 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2018 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.datasource
+import java.net.URI
+
+import org.apache.spark.sql.{DataFrameReader, DataFrameWriter}
+import org.locationtech.rasterframes._
+import _root_.geotrellis.proj4.CRS
+import shapeless.tag.@@
+import shapeless.tag
+
+package object geotiff {
+ /** Tagged type construction for enabling type-safe extension methods for loading
+ * a RasterFrameLayer from a single GeoTiff. */
+ type GeoTiffRasterFrameReader = DataFrameReader @@ GeoTiffRasterFrameReaderTag
+ trait GeoTiffRasterFrameReaderTag
+
+ /** Tagged type construction for enabling type-safe extension methods for writing
+ * a RasterFrame to a geotiff. */
+ type GeoTiffRasterFrameWriter[T] = DataFrameWriter[T] @@ GeoTiffRasterFrameWriterTag
+ trait GeoTiffRasterFrameWriterTag
+
+ /** Adds `geotiff` format specifier to `DataFrameReader`. */
+ implicit class DataFrameReaderHasGeoTiffFormat(val reader: DataFrameReader) {
+ @deprecated("Use `raster` instead.", "7/1/2019")
+ def geotiff: GeoTiffRasterFrameReader =
+ tag[GeoTiffRasterFrameReaderTag][DataFrameReader](
+ reader.format(GeoTiffDataSource.SHORT_NAME)
+ )
+ }
+
+ implicit class DataFrameWriterHasGeoTiffFormat[T](val writer: DataFrameWriter[T]) {
+ def geotiff: GeoTiffRasterFrameWriter[T] =
+ tag[GeoTiffRasterFrameWriterTag][DataFrameWriter[T]](
+ writer.format(GeoTiffDataSource.SHORT_NAME)
+ )
+
+ def withDimensions(cols: Int, rows: Int): GeoTiffRasterFrameWriter[T] =
+ tag[GeoTiffRasterFrameWriterTag][DataFrameWriter[T]](
+ writer
+ .option(GeoTiffDataSource.IMAGE_WIDTH_PARAM, cols)
+ .option(GeoTiffDataSource.IMAGE_HEIGHT_PARAM, rows)
+ )
+
+ def withCompression: GeoTiffRasterFrameWriter[T] =
+ tag[GeoTiffRasterFrameWriterTag][DataFrameWriter[T]](
+ writer
+ .option(GeoTiffDataSource.COMPRESS_PARAM, true)
+ )
+ def withCRS(crs: CRS): GeoTiffRasterFrameWriter[T] =
+ tag[GeoTiffRasterFrameWriterTag][DataFrameWriter[T]](
+ writer
+ .option(GeoTiffDataSource.CRS_PARAM, crs.toProj4String)
+ )
+ }
+
+ /** Adds `loadLayer` to appropriately tagged `DataFrameReader` */
+ implicit class GeoTiffReaderWithRF(val reader: GeoTiffRasterFrameReader) {
+ @deprecated("Use `raster` instead.", "7/1/2019")
+ def loadLayer(path: URI): RasterFrameLayer = reader.load(path.toASCIIString).asLayer
+ }
+}
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala
similarity index 93%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala
index 5af7f1e3f..11edc1d5f 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala
@@ -15,23 +15,23 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
import java.net.URI
-import astraea.spark.rasterframes
-import astraea.spark.rasterframes.datasource.geotrellis.GeoTrellisCatalog.GeoTrellisCatalogRelation
-import astraea.spark.rasterframes.util.time
import geotrellis.spark.io.AttributeStore
import org.apache.spark.annotation.Experimental
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
+import org.apache.spark.sql.functions._
import org.apache.spark.sql.rf.VersionShims
import org.apache.spark.sql.sources._
-import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.StructType
+import org.locationtech.rasterframes.datasource.geotrellis.GeoTrellisCatalog.GeoTrellisCatalogRelation
import spray.json.DefaultJsonProtocol._
import spray.json._
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisLayerDataSource.scala
similarity index 90%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisLayerDataSource.scala
index 63e88e25a..d12ea1e17 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisLayerDataSource.scala
@@ -15,14 +15,16 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
import java.net.URI
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.datasource.DataSourceOptions
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.datasource.DataSourceOptions
import geotrellis.spark._
import geotrellis.spark.io._
import geotrellis.spark.io.index.ZCurveKeyIndexMethod
@@ -37,9 +39,9 @@ import scala.util.Try
* DataSource over a GeoTrellis layer store.
*/
@Experimental
-class DefaultSource extends DataSourceRegister
+class GeoTrellisLayerDataSource extends DataSourceRegister
with RelationProvider with CreatableRelationProvider with DataSourceOptions {
- def shortName(): String = DefaultSource.SHORT_NAME
+ def shortName(): String = GeoTrellisLayerDataSource.SHORT_NAME
/**
* Create a GeoTrellis data source.
@@ -79,8 +81,8 @@ class DefaultSource extends DataSourceRegister
require(layerName.isDefined, s"'$LAYER_PARAM' parameter for raster layer name required.")
require(zoom.isDefined, s"Integer '$ZOOM_PARAM' parameter for raster layer zoom level required.")
- val rf = data.asRFSafely
- .getOrElse(throw new IllegalArgumentException("Only a valid RasterFrame can be saved as a GeoTrellis layer"))
+ val rf = data.asLayerSafely
+ .getOrElse(throw new IllegalArgumentException("Only a valid RasterFrameLayer can be saved as a GeoTrellis layer"))
val tileColumn = parameters.get(TILE_COLUMN_PARAM).map(c ⇒ rf(c))
@@ -110,6 +112,6 @@ class DefaultSource extends DataSourceRegister
}
}
-object DefaultSource {
+object GeoTrellisLayerDataSource {
final val SHORT_NAME = "geotrellis"
}
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala
similarity index 93%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala
index 918f43015..343f4683d 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2017-2018 Azavea & Astraea, Inc.
+ * Copyright 2017-2019 Astraea, Inc. & Azavea
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,31 +15,33 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
import java.io.UnsupportedEncodingException
import java.net.URI
import java.sql.{Date, Timestamp}
import java.time.{ZoneOffset, ZonedDateTime}
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.datasource.geotrellis.GeoTrellisRelation.{C, TileFeatureData}
-import astraea.spark.rasterframes.datasource.geotrellis.TileFeatureSupport._
-import astraea.spark.rasterframes.rules.splitFilters
-import astraea.spark.rasterframes.rules.SpatialFilters.{Contains ⇒ sfContains, Intersects ⇒ sfIntersects}
-import astraea.spark.rasterframes.rules.SpatialRelationReceiver
-import astraea.spark.rasterframes.rules.TemporalFilters.{BetweenDates, BetweenTimes}
-import astraea.spark.rasterframes.util.SubdivideSupport._
-import astraea.spark.rasterframes.util._
-import com.vividsolutions.jts.geom
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.datasource.geotrellis.TileFeatureSupport._
+import org.locationtech.rasterframes.rules.splitFilters
+import org.locationtech.rasterframes.rules.SpatialFilters.{Contains => sfContains, Intersects => sfIntersects}
+import org.locationtech.rasterframes.rules.SpatialRelationReceiver
+import org.locationtech.rasterframes.rules.TemporalFilters.{BetweenDates, BetweenTimes}
+import org.locationtech.rasterframes.util.SubdivideSupport._
+import org.locationtech.rasterframes.util._
+import com.typesafe.scalalogging.LazyLogging
+import org.locationtech.jts.geom
import geotrellis.raster.{CellGrid, MultibandTile, Tile, TileFeature}
import geotrellis.spark.io._
import geotrellis.spark.io.avro.AvroRecordCodec
import geotrellis.spark.util.KryoWrapper
import geotrellis.spark.{LayerId, Metadata, SpatialKey, TileLayerMetadata, _}
-import geotrellis.util.{LazyLogging, _}
+import geotrellis.util._
import geotrellis.vector._
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
@@ -49,6 +51,7 @@ import org.apache.spark.sql.rf.TileUDT
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext, sources}
+import org.locationtech.rasterframes.datasource.geotrellis.GeoTrellisRelation.{C, TileFeatureData}
import scala.reflect.ClassTag
import scala.reflect.runtime.universe._
@@ -348,6 +351,6 @@ object GeoTrellisRelation {
lazy val TS = TIMESTAMP_COLUMN.columnName
lazy val TL = TILE_COLUMN.columnName
lazy val TF = TILE_FEATURE_DATA_COLUMN.columnName
- lazy val EX = BOUNDS_COLUMN.columnName
+ lazy val EX = GEOMETRY_COLUMN.columnName
}
}
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/Layer.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/Layer.scala
similarity index 85%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/Layer.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/Layer.scala
index a659de2ff..9f90c96fd 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/Layer.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/Layer.scala
@@ -15,16 +15,18 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
import java.net.URI
-import astraea.spark.rasterframes
-import astraea.spark.rasterframes.encoders.DelegatingSubfieldEncoder
+import org.locationtech.rasterframes.encoders.DelegatingSubfieldEncoder
import geotrellis.spark.LayerId
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.locationtech.rasterframes
/**
* /** Connector between a GT `LayerId` and the path in which it lives. */
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/MergeableData.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/MergeableData.scala
similarity index 95%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/MergeableData.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/MergeableData.scala
index 73d537866..34bd6536b 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/MergeableData.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/MergeableData.scala
@@ -15,9 +15,11 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
trait MergeableData[D] {
def merge(l:D, r:D): D
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupport.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupport.scala
similarity index 96%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupport.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupport.scala
index 6691d6d51..67ea65510 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupport.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupport.scala
@@ -15,11 +15,13 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes.util._
import geotrellis.raster.crop.{Crop, TileCropMethods}
import geotrellis.raster.mask.TileMaskMethods
import geotrellis.raster.merge.TileMergeMethods
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/package.scala
similarity index 79%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/package.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/package.scala
index 545c1f236..c4a7dc425 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/package.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/package.scala
@@ -15,24 +15,20 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource
+package org.locationtech.rasterframes.datasource
import java.net.URI
-import _root_.geotrellis.spark.LayerId
-import astraea.spark.rasterframes.datasource.geotrellis.DefaultSource._
-import astraea.spark.rasterframes.{RasterFrame, _}
import org.apache.spark.sql._
-import org.apache.spark.sql.functions.col
-import shapeless.tag
+import org.apache.spark.sql.functions._
+import _root_.geotrellis.spark.LayerId
+import org.locationtech.rasterframes._
import shapeless.tag.@@
+import shapeless.tag
-/**
- * Extension methods for literate and type-safe loading of geotrellis layers.
- *
- * @since 1/12/18
- */
package object geotrellis extends DataSourceOptions {
implicit val layerEncoder = Layer.layerEncoder
@@ -40,11 +36,11 @@ package object geotrellis extends DataSourceOptions {
def geotrellis_layer = col("layer").as[Layer]
/** Tagged type construction for enabling type-safe extension methods for loading
- * a RasterFrame from a GeoTrellis layer. */
+ * a RasterFrameLayer from a GeoTrellis layer. */
type GeoTrellisRasterFrameReader = DataFrameReader @@ GeoTrellisRasterFrameReaderTag
trait GeoTrellisRasterFrameReaderTag
/** Tagged type construction for enabling type-safe extension methods for writing
- * a RasterFrame to a GeoTrellis layer. */
+ * a RasterFrameLayer to a GeoTrellis layer. */
type GeoTrellisRasterFrameWriter[T] = DataFrameWriter[T] @@ GeoTrellisRasterFrameWriterTag
trait GeoTrellisRasterFrameWriterTag
@@ -56,12 +52,12 @@ package object geotrellis extends DataSourceOptions {
reader.format("geotrellis-catalog").load(base.toASCIIString)
def geotrellis: GeoTrellisRasterFrameReader =
- tag[GeoTrellisRasterFrameReaderTag][DataFrameReader](reader.format(SHORT_NAME))
+ tag[GeoTrellisRasterFrameReaderTag][DataFrameReader](reader.format(GeoTrellisLayerDataSource.SHORT_NAME))
}
implicit class DataFrameWriterHasGeotrellisFormat[T](val writer: DataFrameWriter[T]) {
def geotrellis: GeoTrellisRasterFrameWriter[T] =
- tag[GeoTrellisRasterFrameWriterTag][DataFrameWriter[T]](writer.format(SHORT_NAME))
+ tag[GeoTrellisRasterFrameWriterTag][DataFrameWriter[T]](writer.format(GeoTrellisLayerDataSource.SHORT_NAME))
}
implicit class GeoTrellisWriterAddLayer[T](val writer: GeoTrellisRasterFrameWriter[T]) {
@@ -75,25 +71,25 @@ package object geotrellis extends DataSourceOptions {
.option("path", layer.base.toASCIIString)
}
- /** Extension methods for loading a RasterFrame from a tagged `DataFrameReader`. */
+ /** Extension methods for loading a RasterFrameLayer from a tagged `DataFrameReader`. */
implicit class GeoTrellisReaderWithRF(val reader: GeoTrellisRasterFrameReader) {
def withTileSubdivisions(divs: Int): GeoTrellisRasterFrameReader =
tag[GeoTrellisRasterFrameReaderTag][DataFrameReader](
- reader.option(TILE_SUBDIVISIONS_PARAM, divs)
+ reader.option(TILE_SUBDIVISIONS_PARAM, divs.toLong)
)
def withNumPartitions(partitions: Int): GeoTrellisRasterFrameReader =
tag[GeoTrellisRasterFrameReaderTag][DataFrameReader](
- reader.option(NUM_PARTITIONS_PARAM, partitions)
+ reader.option(NUM_PARTITIONS_PARAM, partitions.toLong)
)
- def loadRF(uri: URI, id: LayerId): RasterFrame =
+ def loadLayer(uri: URI, id: LayerId): RasterFrameLayer =
reader
.option(LAYER_PARAM, id.name)
.option(ZOOM_PARAM, id.zoom.toString)
.load(uri.toASCIIString)
- .asRF
+ .asLayer
- def loadRF(layer: Layer): RasterFrame = loadRF(layer.base, layer.id)
+ def loadLayer(layer: Layer): RasterFrameLayer = loadLayer(layer.base, layer.id)
}
}
diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala
similarity index 92%
rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/package.scala
rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala
index 6962e9a11..9a649bb94 100644
--- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/package.scala
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala
@@ -15,14 +15,14 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes
+package org.locationtech.rasterframes
import java.net.URI
-import org.apache.spark.sql.sources.{And, Filter}
-
import scala.util.Try
/**
diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala
new file mode 100644
index 000000000..6cea717ec
--- /dev/null
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala
@@ -0,0 +1,154 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.datasource.raster
+
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.util._
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider}
+import org.locationtech.rasterframes.model.TileDimensions
+
+class RasterSourceDataSource extends DataSourceRegister with RelationProvider {
+ import RasterSourceDataSource._
+ override def shortName(): String = SHORT_NAME
+ override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = {
+ val bands = parameters.bandIndexes
+ val tiling = parameters.tileDims
+ val lazyTiles = parameters.lazyTiles
+ val spec = parameters.pathSpec
+ val catRef = spec.fold(_.registerAsTable(sqlContext), identity)
+ RasterSourceRelation(sqlContext, catRef, bands, tiling, lazyTiles)
+ }
+}
+
+object RasterSourceDataSource {
+ final val SHORT_NAME = "raster"
+ final val PATH_PARAM = "path"
+ final val PATHS_PARAM = "paths"
+ final val BAND_INDEXES_PARAM = "bandIndexes"
+ final val TILE_DIMS_PARAM = "tileDimensions"
+ final val CATALOG_TABLE_PARAM = "catalogTable"
+ final val CATALOG_TABLE_COLS_PARAM = "catalogColumns"
+ final val CATALOG_CSV_PARAM = "catalogCSV"
+ final val LAZY_TILES_PARAM = "lazyTiles"
+
+ final val DEFAULT_COLUMN_NAME = PROJECTED_RASTER_COLUMN.columnName
+
+ trait WithBandColumns {
+ def bandColumnNames: Seq[String]
+ }
+ /** Container for specifying raster paths. */
+ case class RasterSourceCatalog(csv: String, bandColumnNames: String*) extends WithBandColumns {
+ def registerAsTable(sqlContext: SQLContext): RasterSourceCatalogRef = {
+ import sqlContext.implicits._
+ val lines = csv
+ .split(Array('\n','\r'))
+ .map(_.trim)
+ .filter(_.nonEmpty)
+
+ val dsLines = sqlContext.createDataset(lines)
+ val catalog = sqlContext.read
+ .option("header", "true")
+ .option("ignoreTrailingWhiteSpace", true)
+ .option("ignoreLeadingWhiteSpace", true)
+ .csv(dsLines)
+
+ val tmpName = tmpTableName()
+ catalog.createOrReplaceTempView(tmpName)
+
+ val cols = if (bandColumnNames.isEmpty) catalog.columns.toSeq
+ else bandColumnNames
+
+ RasterSourceCatalogRef(tmpName, cols: _*)
+ }
+ }
+
+ object RasterSourceCatalog {
+ def apply(singlebandPaths: Seq[String]): Option[RasterSourceCatalog] =
+ if (singlebandPaths.isEmpty) None
+ else {
+ val header = DEFAULT_COLUMN_NAME
+ val csv = header + "\n" + singlebandPaths.mkString("\n")
+ Some(new RasterSourceCatalog(csv, header))
+ }
+ }
+
+ /** Container for specifying where to select raster paths from. */
+ case class RasterSourceCatalogRef(tableName: String, bandColumnNames: String*) extends WithBandColumns
+
+ private[raster]
+ implicit class ParamsDictAccessors(val parameters: Map[String, String]) extends AnyVal {
+ def tokenize(csv: String): Seq[String] = csv.split(',').map(_.trim)
+
+ def tileDims: Option[TileDimensions] =
+ parameters.get(TILE_DIMS_PARAM)
+ .map(tokenize(_).map(_.toInt))
+ .map { case Seq(cols, rows) => TileDimensions(cols, rows)}
+
+ def bandIndexes: Seq[Int] = parameters
+ .get(BAND_INDEXES_PARAM)
+ .map(tokenize(_).map(_.toInt))
+ .getOrElse(Seq(0))
+
+
+ def lazyTiles: Boolean = parameters
+ .get(LAZY_TILES_PARAM).forall(_.toBoolean)
+
+ def catalog: Option[RasterSourceCatalog] = {
+ val paths = (
+ parameters
+ .get(PATHS_PARAM)
+ .toSeq
+ .flatMap(_.split(Array('\n','\r'))) ++
+ parameters
+ .get(RasterSourceDataSource.PATH_PARAM)
+ .toSeq
+ ).filter(_.nonEmpty)
+
+ RasterSourceCatalog(paths)
+ .orElse(parameters
+ .get(CATALOG_CSV_PARAM)
+ .map(RasterSourceCatalog(_, catalogTableCols: _*))
+ )
+ }
+
+ def catalogTableCols: Seq[String] = parameters
+ .get(CATALOG_TABLE_COLS_PARAM)
+ .map(tokenize(_).filter(_.nonEmpty).toSeq)
+ .getOrElse(Seq.empty)
+
+ def catalogTable: Option[RasterSourceCatalogRef] = parameters
+ .get(CATALOG_TABLE_PARAM)
+ .map(p => RasterSourceCatalogRef(p, catalogTableCols: _*))
+
+ def pathSpec: Either[RasterSourceCatalog, RasterSourceCatalogRef] = {
+ (catalog, catalogTable) match {
+ case (Some(f), None) => Left(f)
+ case (None, Some(p)) => Right(p)
+ case (None, None) => throw new IllegalArgumentException(
+ s"Unable to interpret paths from: ${parameters.mkString("\n", "\n", "\n")}")
+ case _ => throw new IllegalArgumentException(
+ "Only one of a set of file paths OR a paths table column may be provided.")
+ }
+ }
+ }
+}
diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala
new file mode 100644
index 000000000..6af519f56
--- /dev/null
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala
@@ -0,0 +1,136 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.datasource.raster
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.sources.{BaseRelation, TableScan}
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.locationtech.rasterframes.datasource.raster.RasterSourceDataSource.RasterSourceCatalogRef
+import org.locationtech.rasterframes.encoders.CatalystSerializer._
+import org.locationtech.rasterframes.expressions.generators.{RasterSourceToRasterRefs, RasterSourceToTiles}
+import org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.bandNames
+import org.locationtech.rasterframes.expressions.transformers.{RasterRefToTile, URIToRasterSource}
+import org.locationtech.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.tiles.ProjectedRasterTile
+
+/**
+ * Constructs a Spark Relation over one or more RasterSource paths.
+ * @param sqlContext Query context
+ * @param catalogTable Specification of raster path sources
+ * @param bandIndexes band indexes to fetch
+ * @param subtileDims how big to tile/subdivide rasters info
+ */
+case class RasterSourceRelation(
+ sqlContext: SQLContext,
+ catalogTable: RasterSourceCatalogRef,
+ bandIndexes: Seq[Int],
+ subtileDims: Option[TileDimensions],
+ lazyTiles: Boolean
+) extends BaseRelation with TableScan {
+
+ lazy val inputColNames = catalogTable.bandColumnNames
+
+ def pathColNames = inputColNames
+ .map(_ + "_path")
+
+ def srcColNames = inputColNames
+ .map(_ + "_src")
+
+ def refColNames = srcColNames
+ .flatMap(bandNames(_, bandIndexes))
+ .map(_ + "_ref")
+
+ def tileColNames = inputColNames
+ .flatMap(bandNames(_, bandIndexes))
+
+ lazy val extraCols: Seq[StructField] = {
+ val catalog = sqlContext.table(catalogTable.tableName)
+ catalog.schema.fields.filter(f => !catalogTable.bandColumnNames.contains(f.name))
+ }
+
+ override def schema: StructType = {
+ val tileSchema = schemaOf[ProjectedRasterTile]
+ val paths = for {
+ pathCol <- pathColNames
+ } yield StructField(pathCol, StringType, false)
+ val tiles = for {
+ tileColName <- tileColNames
+ } yield StructField(tileColName, tileSchema, true)
+
+ StructType(paths ++ tiles ++ extraCols)
+ }
+
+ override def buildScan(): RDD[Row] = {
+ import sqlContext.implicits._
+
+ // The general transformaion is:
+ // input -> path -> src -> ref -> tile
+ // Each step is broken down for readability
+ val inputs: DataFrame = sqlContext.table(catalogTable.tableName)
+
+ // Basically renames the input columns to have the '_path' suffix
+ val pathsAliasing = for {
+ (input, path) <- inputColNames.zip(pathColNames)
+ } yield col(input).as(path)
+
+ // Wraps paths in a RasterSource
+ val srcs = for {
+ (pathColName, srcColName) <- pathColNames.zip(srcColNames)
+ } yield URIToRasterSource(col(pathColName)) as srcColName
+
+ // Add path columns
+ val withPaths = inputs
+ .select($"*" +: pathsAliasing: _*)
+
+ // Path columns have to be manually pulled along through each step. Resolve columns once
+ // and reused with each select.
+ val paths = pathColNames.map(withPaths.apply)
+
+ // Input columns along for the ride.
+ val extras = extraCols.map(f => inputs(f.name))
+
+ val df = if (lazyTiles) {
+ // Expand RasterSource into multiple columns per band, and multiple rows per tile
+ // There's some unintentional fragililty here in that the structure of the expression
+ // is expected to line up with our column structure here.
+ val refs = RasterSourceToRasterRefs(subtileDims, bandIndexes, srcs: _*) as refColNames
+
+ // RasterSourceToRasterRef is a generator, which means you have to do the Tile conversion
+ // in a separate select statement (Query planner doesn't know how many columns ahead of time).
+ val refsToTiles = for {
+ (refColName, tileColName) <- refColNames.zip(tileColNames)
+ } yield RasterRefToTile(col(refColName)) as tileColName
+
+ withPaths
+ .select(extras ++ paths :+ refs: _*)
+ .select(paths ++ refsToTiles ++ extras: _*)
+ }
+ else {
+ val tiles = RasterSourceToTiles(subtileDims, bandIndexes, srcs: _*) as tileColNames
+ withPaths
+ .select((paths :+ tiles) ++ extras: _*)
+ }
+ df.rdd
+ }
+}
diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/package.scala
new file mode 100644
index 000000000..d85f435d2
--- /dev/null
+++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/package.scala
@@ -0,0 +1,93 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.datasource
+
+import java.net.URI
+import java.util.UUID
+
+import org.apache.spark.sql.{DataFrame, DataFrameReader}
+import shapeless.tag
+import shapeless.tag.@@
+package object raster {
+
+ private[raster] def tmpTableName() = UUID.randomUUID().toString.replace("-", "")
+
+ trait RasterSourceDataFrameReaderTag
+ type RasterSourceDataFrameReader = DataFrameReader @@ RasterSourceDataFrameReaderTag
+
+ /** Adds `raster` format specifier to `DataFrameReader`. */
+ implicit class DataFrameReaderHasRasterSourceFormat(val reader: DataFrameReader) {
+ def raster: RasterSourceDataFrameReader =
+ tag[RasterSourceDataFrameReaderTag][DataFrameReader](
+ reader.format(RasterSourceDataSource.SHORT_NAME))
+ }
+
+ /** Adds option methods relevant to RasterSourceDataSource. */
+ implicit class RasterSourceDataFrameReaderHasOptions(val reader: RasterSourceDataFrameReader) {
+ /** Set the zero-based band indexes to read. Defaults to Seq(0). */
+ def withBandIndexes(bandIndexes: Int*): RasterSourceDataFrameReader =
+ tag[RasterSourceDataFrameReaderTag][DataFrameReader](
+ reader.option(RasterSourceDataSource.BAND_INDEXES_PARAM, bandIndexes.mkString(",")))
+
+ def withTileDimensions(cols: Int, rows: Int): RasterSourceDataFrameReader =
+ tag[RasterSourceDataFrameReaderTag][DataFrameReader](
+ reader.option(RasterSourceDataSource.TILE_DIMS_PARAM, s"$cols,$rows")
+ )
+
+ /** Indicate if tile reading should be delayed until cells are fetched. Defaults to `true`. */
+ def withLazyTiles(state: Boolean): RasterSourceDataFrameReader =
+ tag[RasterSourceDataFrameReaderTag][DataFrameReader](
+ reader.option(RasterSourceDataSource.LAZY_TILES_PARAM, state))
+
+ def fromCatalog(catalog: DataFrame, bandColumnNames: String*): RasterSourceDataFrameReader =
+ tag[RasterSourceDataFrameReaderTag][DataFrameReader] {
+ val tmpName = tmpTableName()
+ catalog.createOrReplaceTempView(tmpName)
+ reader
+ .option(RasterSourceDataSource.CATALOG_TABLE_PARAM, tmpName)
+ .option(RasterSourceDataSource.CATALOG_TABLE_COLS_PARAM, bandColumnNames.mkString(",")): DataFrameReader
+ }
+
+ def fromCatalog(tableName: String, bandColumnNames: String*): RasterSourceDataFrameReader =
+ tag[RasterSourceDataFrameReaderTag][DataFrameReader](
+ reader.option(RasterSourceDataSource.CATALOG_TABLE_PARAM, tableName)
+ .option(RasterSourceDataSource.CATALOG_TABLE_COLS_PARAM, bandColumnNames.mkString(","))
+ )
+
+ def fromCSV(catalogCSV: String, bandColumnNames: String*): RasterSourceDataFrameReader =
+ tag[RasterSourceDataFrameReaderTag][DataFrameReader](
+ reader.option(RasterSourceDataSource.CATALOG_CSV_PARAM, catalogCSV)
+ .option(RasterSourceDataSource.CATALOG_TABLE_COLS_PARAM, bandColumnNames.mkString(","))
+ )
+
+ def from(newlineDelimPaths: String): RasterSourceDataFrameReader =
+ tag[RasterSourceDataFrameReaderTag][DataFrameReader](
+ reader.option(RasterSourceDataSource.PATHS_PARAM, newlineDelimPaths)
+ )
+
+ def from(paths: Seq[String]): RasterSourceDataFrameReader =
+ from(paths.mkString("\n"))
+
+ def from(uris: Seq[URI])(implicit d: DummyImplicit): RasterSourceDataFrameReader =
+ from(uris.map(_.toASCIIString))
+ }
+}
diff --git a/experimental/src/test/resources/buildings.geojson b/datasource/src/test/resources/buildings.geojson
similarity index 100%
rename from experimental/src/test/resources/buildings.geojson
rename to datasource/src/test/resources/buildings.geojson
diff --git a/experimental/src/test/resources/example.geojson b/datasource/src/test/resources/example.geojson
similarity index 100%
rename from experimental/src/test/resources/example.geojson
rename to datasource/src/test/resources/example.geojson
diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala b/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala
deleted file mode 100644
index 3bdeecd81..000000000
--- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-package astraea.spark.rasterframes.datasource.geotiff
-
-import java.nio.file.Paths
-
-import astraea.spark.rasterframes._
-import org.apache.spark.sql.functions._
-
-/**
- * @since 1/14/18
- */
-class GeoTiffDataSourceSpec
- extends TestEnvironment with TestData {
-
- val cogPath = getClass.getResource("/LC08_RGB_Norfolk_COG.tiff").toURI
- val nonCogPath = getClass.getResource("/L8-B8-Robinson-IL.tiff").toURI
- val l8samplePath = getClass.getResource("/L8-B1-Elkton-VA.tiff").toURI
-
- describe("GeoTiff reading") {
-
- it("should read sample GeoTiff") {
- val rf = spark.read
- .geotiff
- .loadRF(cogPath)
-
- assert(rf.count() > 10)
- }
-
- it("should lay out tiles correctly"){
-
- val rf = spark.read
- .geotiff
- .loadRF(cogPath)
-
- val tlm = rf.tileLayerMetadata.left.get
- val gb = tlm.gridBounds
- assert(gb.colMax > gb.colMin)
- assert(gb.rowMax > gb.rowMin)
- }
-
- it("should lay out tiles correctly for non-tiled tif") {
- val rf = spark.read
- .geotiff
- .loadRF(nonCogPath)
-
- println(rf.count())
- rf.show(false)
-
- assert(rf.count() > 1)
-
- import org.apache.spark.sql.functions._
- logger.info(
- rf.agg(
- min(col("spatial_key.row")) as "rowmin",
- max(col("spatial_key.row")) as "rowmax",
- min(col("spatial_key.col")) as "colmin",
- max(col("spatial_key.col")) as "colmax"
-
- ).first.toSeq.toString()
- )
- val tlm = rf.tileLayerMetadata.left.get
- val gb = tlm.gridBounds
- assert(gb.rowMax > gb.rowMin)
- assert(gb.colMax > gb.colMin)
-
- }
-
- it("should read in correctly check-summed contents") {
- // c.f. TileStatsSpec -> computing statistics over tiles -> should compute tile statistics -> sum
- val rf = spark.read.geotiff.loadRF(l8samplePath)
- val expected = 309149454 // computed with rasterio
- val result = rf.agg(
- sum(tile_sum(rf("tile")))
- ).collect().head.getDouble(0)
-
- assert(result === expected)
- }
-
- it("should write GeoTIFF RF to parquet") {
- val rf = spark.read
- .geotiff
- .loadRF(cogPath)
- assert(write(rf))
- }
-
- it("should write GeoTIFF") {
- val rf = spark.read
- .geotiff
- .loadRF(cogPath)
-
- logger.info(s"Read extent: ${rf.tileLayerMetadata.merge.extent}")
-
- val out = Paths.get("target", "example-geotiff.tiff")
- logger.info(s"Writing to $out")
- noException shouldBe thrownBy {
- rf.write.geotiff.save(out.toString)
- }
- }
- }
-}
diff --git a/datasource/src/test/scala/examples/Creating.scala b/datasource/src/test/scala/examples/Creating.scala
deleted file mode 100644
index 174bbaa3f..000000000
--- a/datasource/src/test/scala/examples/Creating.scala
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * This software is licensed under the Apache 2 license, quoted below.
- *
- * Copyright 2018 Astraea, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * [http://www.apache.org/licenses/LICENSE-2.0]
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- *
- */
-
-package examples
-
-import java.io.File
-import java.nio.file.Files
-
-import geotrellis.raster.{Raster, Tile}
-
-/**
- * Examples of creating RasterFrames
- *
- * @since 1/16/18
- */
-object Creating extends App {
-/*
-# Creating RasterFrames
-
-## Initialization
-
-There are a couple of setup steps necessary anytime you want to work with RasterFrames. the first is to import the API symbols into scope:
-
-*/
-// tut:silent
-import astraea.spark.rasterframes._
-import org.apache.spark.sql._
-
-
-/*
-Next, initialize the `SparkSession`, and call the `withRasterFrames` method on it:
-*/
-
-// tut:silent
-implicit val spark = SparkSession.builder().
- master("local").appName("RasterFrames").
- getOrCreate().
- withRasterFrames
-
-/*
-And, ss is standard Spark SQL practice, we import additional DataFrame support:
-*/
-
-// tut:silent
-import spark.implicits._
-// tut:invisible
-spark.sparkContext.setLogLevel("ERROR")
-
-
-/*
-Now we are ready to create a RasterFrame.
-
-## Reading a GeoTIFF
-
-The most straightforward way to create a `RasterFrame` is to read a [GeoTIFF](https://en.wikipedia.org/wiki/GeoTIFF)
-file using a RasterFrame [`DataSource`](https://spark.apache.org/docs/latest/sql-programming-guide.html#data-sources)
-designed for this purpose.
-
-First add the following import:
-*/
-
-import astraea.spark.rasterframes.datasource.geotiff._
-/*
-(This is what adds the `.geotiff` method to `spark.read` below.)
-
-Then we use the `DataFrameReader` provided by `spark.read` to read the GeoTIFF:
- */
-
-// tut:book
-val samplePath = new File("src/test/resources/LC08_RGB_Norfolk_COG.tiff")
-val tiffRF = spark.read
- .geotiff
- .loadRF(samplePath.toURI)
-
-/*
-Let's inspect the structure of what we get back:
- */
-
-// tut
-tiffRF.printSchema()
-
-/*
-As reported by Spark, RasterFrames extracts 6 columns from the GeoTIFF we selected. Some of these columns are dependent
-on the contents of the source data, and some are are always available. Let's take a look at these in more detail.
-
-* `spatial_key`: GeoTrellis assigns a `SpatialKey` or a `SpaceTimeKey` to each tile, mapping it to the layer grid from
- which it came. If it has a `SpaceTimeKey`, RasterFrames will split it into a `SpatialKey` and a `TemporalKey` (the
- latter with column name `temporal_key`).
-* `extent`: The bounding box of the tile in the tile's native CRS.
-* `metadata`: The TIFF format header tags found in the file.
-* `tile` or `tile_n` (where `n` is a band number): For singleband GeoTIFF files, the `tile` column contains the cell
- data split into tiles. For multiband tiles, each column with `tile_` prefix contains each of the sources bands,
- in the order they were stored.
-
-See the section [Inspecting a `RasterFrame`](#inspecting-a--code-rasterframe--code-) (below) for more details on accessing the RasterFrame's metadata.
- */
-
-
-/*
-## Reading a GeoTrellis Layer
-
-If your imagery is already ingested into a [GeoTrellis layer](https://docs.geotrellis.io/en/latest/guide/spark.html#writing-layers),
-you can use the RasterFrames GeoTrellis DataSource. There are two parts to this GeoTrellis Layer support. The first
-is the GeoTrellis Catalog DataSource, which lists the GeoTrellis layers available at a URI. The second part is the actual
-RasterFrame reader for pulling a layer into a RasterFrame.
-
-Before we show how all of this works we need to have a GeoTrellis layer to work with. We can create one with the RasterFrame we
-constructed above.
-
- */
-import astraea.spark.rasterframes.datasource.geotrellis._
-
-val base = Files.createTempDirectory("rf-").toUri
-val layer = Layer(base, "sample", 0)
-tiffRF.write.geotrellis.asLayer(layer).save()
-
-/*
-Now we can point our catalog reader at the base directory and see what was saved:
-*/
-
-val cat = spark.read.geotrellisCatalog(base)
-cat.printSchema
-cat.show()
-
-/*
-As you can see, there's a lot of information stored in each row of the catalog. Most of this is associated with how the
-layer is discretized. However, there may be other application specific metadata serialized with a layer that can be use
-to filter the catalog entries or select a specific one. But for now, we're just going to load a RasterFrame in from the
-catalog using a convenience function.
- */
-
-val firstLayer = cat.select(geotrellis_layer).first
-val rfAgain = spark.read.geotrellis.loadRF(firstLayer)
-rfAgain.show()
-
-/*
-If you already know the `LayerId` of what you're wanting to read, you can bypass working with the catalog:
- */
-
-val anotherRF = spark.read.geotrellis.loadRF(layer)
-
-/*
-## Using GeoTrellis APIs
-
-If you are used to working directly with the GeoTrellis APIs, there are a number of additional ways to create a `RasterFrame`, as enumerated in the sections below.
-
-First, some standard `import`s:
-*/
-
-// tut:silent
-import geotrellis.raster.io.geotiff.SinglebandGeoTiff
-import geotrellis.spark.io._
-
-/*
-### From `ProjectedExtent`
-
-The simplest mechanism for getting a RasterFrame is to use the `toRF(tileCols, tileRows)` extension method on `ProjectedRaster`.
-*/
-
-val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff")
-val rf = scene.projectedRaster.toRF(128, 128)
-rf.show(5, false)
-
-/*
-### From `TileLayerRDD`
-
-Another option is to use a GeoTrellis [`LayerReader`](https://docs.geotrellis.io/en/latest/guide/tile-backends.html),
-to get a `TileLayerRDD` for which there's also a `toRF` extension method.
-
-*/
-
-/*
-```scala
-import geotrellis.spark._
-val tiledLayer: TileLayerRDD[SpatialKey] = ???
-val rf = tiledLayer.toRF
-```
-*/
-
-/*
-## Inspecting a `RasterFrame`
-
-`RasterFrame` has a number of methods providing access to metadata about the contents of the RasterFrame.
-
-### Tile Column Names
-
-*/
-
-//```tut:book
-rf.tileColumns.map(_.toString)
-
-/*
-### Spatial Key Column Name
-*/
-
-//```tut:book
-rf.spatialKeyColumn.toString
-
-/*
-### Temporal Key Column
-
-Returns an `Option[Column]` since not all RasterFrames have an explicit temporal dimension.
-*/
-
-//```tut:book
-rf.temporalKeyColumn.map(_.toString)
-
-/*
-### Tile Layer Metadata
-
-The Tile Layer Metadata defines how the spatial/spatiotemporal domain is discretized into tiles, and what the key bounds are.
-*/
-
-import spray.json._
-// NB: The `fold` is required because an `Either` is returned, depending on the key type.
-rf.tileLayerMetadata.fold(_.toJson, _.toJson).prettyPrint
-
-
-//tut:invisible
-spark.stop()
-
-}
diff --git a/experimental/src/test/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSourceTest.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSourceTest.scala
similarity index 89%
rename from experimental/src/test/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSourceTest.scala
rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSourceTest.scala
index 425d91ab8..3d8ec9db3 100644
--- a/experimental/src/test/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSourceTest.scala
+++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSourceTest.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2019 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,13 +15,13 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.experimental.datasource.geojson
-
-import astraea.spark.rasterframes.TestEnvironment
+package org.locationtech.rasterframes.datasource.geojson
import org.apache.spark.sql.types.{LongType, MapType}
+import org.locationtech.rasterframes.TestEnvironment
/**
* Test rig for GeoJsonRelation.
@@ -60,8 +60,7 @@ class GeoJsonDataSourceTest extends TestEnvironment {
.option(GeoJsonDataSource.INFER_SCHEMA, true)
.load(example2)
- results.show()
+ results.count() should be (8)
}
}
-
}
diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala
similarity index 81%
rename from datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala
rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala
index 1d7237c5b..9b69fd89e 100644
--- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala
+++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2018 Astraea. Inc.
+ * Copyright 2018 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,13 +15,15 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
*
*/
-package astraea.spark.rasterframes.datasource.geotiff
+package org.locationtech.rasterframes.datasource.geotiff
import java.io.{File, FilenameFilter}
-import astraea.spark.rasterframes._
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.TestEnvironment
/**
* @since 1/14/18
@@ -33,7 +35,7 @@ class GeoTiffCollectionDataSourceSpec
it("shiould read a directory of files") {
val df = spark.read
- .geotiff
+ .format("geotiff")
.load(geotiffDir.resolve("*.tiff").toString)
val expected = geotiffDir.toFile.list(new FilenameFilter {
override def accept(dir: File, name: String): Boolean = name.endsWith("tiff")
@@ -41,7 +43,7 @@ class GeoTiffCollectionDataSourceSpec
assert(df.select("path").distinct().count() === expected)
- df.show(false)
+ // df.show(false)
}
}
}
diff --git a/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala
new file mode 100644
index 000000000..eb5e55b0c
--- /dev/null
+++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala
@@ -0,0 +1,244 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2018 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+package org.locationtech.rasterframes.datasource.geotiff
+
+import java.nio.file.Paths
+
+import geotrellis.proj4._
+import geotrellis.raster.io.geotiff.{MultibandGeoTiff, SinglebandGeoTiff}
+import geotrellis.vector.Extent
+import org.locationtech.rasterframes._
+import org.apache.spark.sql.functions._
+import org.locationtech.rasterframes.TestEnvironment
+
+/**
+ * @since 1/14/18
+ */
+class GeoTiffDataSourceSpec
+ extends TestEnvironment with TestData {
+
+ describe("GeoTiff reading") {
+
+ it("should read sample GeoTiff") {
+ val rf = spark.read.format("geotiff").load(cogPath.toASCIIString).asLayer
+
+ assert(rf.count() > 10)
+ }
+
+ it("should lay out tiles correctly") {
+
+ val rf = spark.read.format("geotiff").load(cogPath.toASCIIString).asLayer
+
+ val tlm = rf.tileLayerMetadata.left.get
+ val gb = tlm.gridBounds
+ assert(gb.colMax > gb.colMin)
+ assert(gb.rowMax > gb.rowMin)
+ }
+
+ it("should lay out tiles correctly for non-tiled tif") {
+ val rf = spark.read.format("geotiff").load(nonCogPath.toASCIIString).asLayer
+
+ assert(rf.count() > 1)
+
+ import org.apache.spark.sql.functions._
+ logger.info(
+ rf.agg(
+ min(col("spatial_key.row")) as "rowmin",
+ max(col("spatial_key.row")) as "rowmax",
+ min(col("spatial_key.col")) as "colmin",
+ max(col("spatial_key.col")) as "colmax"
+
+ ).first.toSeq.toString()
+ )
+ val tlm = rf.tileLayerMetadata.left.get
+ val gb = tlm.gridBounds
+ assert(gb.rowMax > gb.rowMin)
+ assert(gb.colMax > gb.colMin)
+
+ }
+
+ it("should read in correctly check-summed contents") {
+ // c.f. TileStatsSpec -> computing statistics over tiles -> should compute tile statistics -> sum
+ val rf = spark.read.format("geotiff").load(l8B1SamplePath.toASCIIString).asLayer
+ val expected = 309149454 // computed with rasterio
+ val result = rf.agg(
+ sum(rf_tile_sum(rf("tile")))
+ ).collect().head.getDouble(0)
+
+ assert(result === expected)
+ }
+ }
+
+ describe("GeoTiff writing") {
+
+ it("should write GeoTIFF RF to parquet") {
+ val rf = spark.read.format("geotiff").load(cogPath.toASCIIString).asLayer
+ assert(write(rf))
+ }
+
+ it("should write GeoTIFF from layer") {
+ val rf = spark.read.format("geotiff").load(cogPath.toASCIIString).asLayer
+
+ logger.info(s"Read extent: ${rf.tileLayerMetadata.merge.extent}")
+
+ val out = Paths.get("target", "example-geotiff.tif")
+ logger.info(s"Writing to $out")
+ noException shouldBe thrownBy {
+ rf.write.format("geotiff").save(out.toString)
+ }
+ }
+
+ it("should write unstructured raster") {
+ import spark.implicits._
+ val df = spark.read.format("raster")
+ .option("tileDimensions", "32,32") // oddball
+ .load(nonCogPath.toASCIIString) // core L8-B8-Robinson-IL.tiff
+
+ df.count() should be > 0L
+
+ val crs = df.select(rf_crs($"proj_raster")).first()
+
+ val out = Paths.get("target", "unstructured.tif").toString
+
+ noException shouldBe thrownBy {
+ df.write.geotiff.withCRS(crs).save(out)
+ }
+
+ val (inCols, inRows) = {
+ val id = sampleGeoTiff.imageData // inshallah same as nonCogPath
+ (id.cols, id.rows)
+ }
+ inCols should be (774)
+ inRows should be (500) //from gdalinfo
+
+ val outputTif = SinglebandGeoTiff(out)
+ outputTif.imageData.cols should be (inCols)
+ outputTif.imageData.rows should be (inRows)
+
+ // TODO check datatype, extent.
+ }
+
+ it("should round trip unstructured raster from COG"){
+ import spark.implicits._
+ import org.locationtech.rasterframes.datasource.raster._
+
+ val df = spark.read.raster.withTileDimensions(64, 64).load(singlebandCogPath.toASCIIString)
+
+ val resourceCols = 963 // from gdalinfo
+ val resourceRows = 754
+ val resourceExtent = Extent(752325.0, 3872685.0, 781215.0, 3895305.0)
+
+ df.count() should be > 0L
+
+ val crs = df.select(rf_crs(col("proj_raster"))).first()
+
+ val totalExtentRow = df.select(rf_extent($"proj_raster").alias("ext"))
+ .agg(
+ min($"ext.xmin").alias("xmin"),
+ min($"ext.ymin").alias("ymin"),
+ max($"ext.xmax").alias("xmax"),
+ max($"ext.ymax").alias("ymax")
+ ).first()
+
+ val dfExtent = Extent(totalExtentRow.getDouble(0), totalExtentRow.getDouble(1), totalExtentRow.getDouble(2), totalExtentRow.getDouble(3))
+ logger.info(s"Dataframe extent: ${dfExtent.toString()}")
+
+ dfExtent shouldBe resourceExtent
+
+ val out = Paths.get("target", "unstructured_cog.tif").toString
+
+ noException shouldBe thrownBy {
+ df.write.geotiff.withCRS(crs).save(out)
+ }
+
+ val (inCols, inRows, inExtent, inCellType) = {
+ val tif = readSingleband("LC08_B7_Memphis_COG.tiff")
+ val id = tif.imageData
+ (id.cols, id.rows, tif.extent, tif.cellType)
+ }
+ inCols should be (963)
+ inRows should be (754) //from gdalinfo
+ inExtent should be (resourceExtent)
+
+ val outputTif = SinglebandGeoTiff(out)
+ outputTif.imageData.cols should be (inCols)
+ outputTif.imageData.rows should be (inRows)
+ outputTif.extent should be (resourceExtent)
+ outputTif.cellType should be (inCellType)
+ }
+
+ it("should write GeoTIFF without layer") {
+ import org.locationtech.rasterframes.datasource.raster._
+ val pr = col("proj_raster_b0")
+ val rf = spark.read.raster.withBandIndexes(0, 1, 2).load(rgbCogSamplePath.toASCIIString)
+
+ val out = Paths.get("target", "example2-geotiff.tif")
+ logger.info(s"Writing to $out")
+
+ withClue("explicit extent/crs") {
+ noException shouldBe thrownBy {
+ rf
+ .withColumn("extent", rf_extent(pr))
+ .withColumn("crs", rf_crs(pr))
+ .write.geotiff.withCRS(LatLng).save(out.toString)
+ }
+ }
+
+ withClue("without explicit extent/crs") {
+ noException shouldBe thrownBy {
+ rf
+ .write.geotiff.withCRS(LatLng).save(out.toString)
+ }
+ }
+ withClue("with downsampling") {
+ noException shouldBe thrownBy {
+ rf
+ .write.geotiff
+ .withCRS(LatLng)
+ .withDimensions(128, 128)
+ .save(out.toString)
+ }
+ }
+ }
+
+ def s(band: Int): String =
+ s"https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/" +
+ s"MCD43A4.A2019059.h11v08.006.2019072203257_B0${band}.TIF"
+
+ it("shoud write multiband") {
+ import org.locationtech.rasterframes.datasource.raster._
+
+ val cat = s"""
+red,green,blue
+${s(1)},${s(4)},${s(3)}
+"""
+ val scene = spark.read.raster.fromCSV(cat, "red", "green", "blue").load()
+ val out = Paths.get("target", "geotiff-overview.tif").toString
+ scene.write.geotiff
+ .withCRS(LatLng)
+ .withDimensions(256, 256)
+ .save(out)
+
+ val outTif = MultibandGeoTiff(out)
+ outTif.bandCount should be (3)
+ }
+ }
+}
diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala
similarity index 86%
rename from datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala
rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala
index e69a5414a..c409eb216 100644
--- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala
+++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala
@@ -15,17 +15,20 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
import java.io.File
-import astraea.spark.rasterframes._
-import geotrellis.proj4.{CRS, LatLng, Sinusoidal}
+import org.locationtech.rasterframes._
+import geotrellis.proj4.LatLng
import geotrellis.spark._
import geotrellis.spark.io._
import geotrellis.spark.io.index.ZCurveKeyIndexMethod
import org.apache.hadoop.fs.FileUtil
+import org.locationtech.rasterframes.TestEnvironment
import org.scalatest.BeforeAndAfter
/**
@@ -67,7 +70,7 @@ class GeoTrellisCatalogSpec
.collect
assert(layer.length === 2)
- val lots = layer.map(sqlContext.read.geotrellis.loadRF).map(_.toDF).reduce(_ union _)
+ val lots = layer.map(sqlContext.read.geotrellis.loadLayer).map(_.toDF).reduce(_ union _)
assert(lots.count === 60)
}
}
diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala
similarity index 79%
rename from datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala
rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala
index 009382639..ecd3351df 100644
--- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala
+++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala
@@ -1,7 +1,7 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
- * Copyright 2017-2018 Azavea & Astraea, Inc.
+ * Copyright 2017-2019 Azavea & Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -15,17 +15,19 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
import java.io.File
import java.sql.Timestamp
import java.time.ZonedDateTime
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.datasource.DataSourceOptions
-import astraea.spark.rasterframes.rules._
-import astraea.spark.rasterframes.util._
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.datasource.DataSourceOptions
+import org.locationtech.rasterframes.rules._
+import org.locationtech.rasterframes.util._
import geotrellis.proj4.LatLng
import geotrellis.raster._
import geotrellis.raster.resample.NearestNeighbor
@@ -40,9 +42,10 @@ import geotrellis.vector._
import org.apache.avro.generic._
import org.apache.avro.{Schema, SchemaBuilder}
import org.apache.hadoop.fs.FileUtil
-import org.apache.spark.sql.functions.{udf ⇒ sparkUdf}
+import org.apache.spark.sql.functions.{udf => sparkUdf}
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.storage.StorageLevel
+import org.locationtech.rasterframes.TestEnvironment
import org.scalatest.{BeforeAndAfterAll, Inspectors}
import scala.math.{max, min}
@@ -88,7 +91,7 @@ class GeoTrellisDataSourceSpec
outputDir.deleteOnExit()
// Test layer writing via RF
- testRdd.toRF.write.geotrellis.asLayer(layer).save()
+ testRdd.toLayer.write.geotrellis.asLayer(layer).save()
val tfRdd = testRdd.map { case (k, tile) ⇒
val md = Map("col" -> k.col,"row" -> k.row)
@@ -114,7 +117,7 @@ class GeoTrellisDataSourceSpec
val tlfRdd = ContextRDD(tfRdd, testRdd.metadata)
writer.write(tfLayer.id, tlfRdd, ZCurveKeyIndexMethod.byDay())
- //TestData.sampleTileLayerRDD.toRF.write.geotrellis.asLayer(sampleImageLayer).save()
+ //TestData.sampleTileLayerRDD.toLayer.write.geotrellis.asLayer(sampleImageLayer).save()
val writer2 = LayerWriter(sampleImageLayer.base)
val imgRDD = TestData.sampleTileLayerRDD
writer2.write(sampleImageLayer.id, imgRDD, ZCurveKeyIndexMethod)
@@ -123,13 +126,13 @@ class GeoTrellisDataSourceSpec
describe("DataSource reading") {
def layerReader = spark.read.geotrellis
it("should read tiles") {
- val df = layerReader.loadRF(layer)
+ val df = layerReader.loadLayer(layer)
assert(df.count === tileCoordRange.length * tileCoordRange.length)
}
it("used produce tile UDT that we can manipulate") {
- val df = layerReader.loadRF(layer)
- .select(SPATIAL_KEY_COLUMN, tile_stats(TILE_COLUMN))
+ val df = layerReader.loadLayer(layer)
+ .select(SPATIAL_KEY_COLUMN, rf_tile_stats(TILE_COLUMN))
assert(df.count() > 0)
}
@@ -138,7 +141,7 @@ class GeoTrellisDataSourceSpec
val bbox = testRdd.metadata.layout
.mapTransform(boundKeys.toGridBounds())
.jtsGeom
- val wc = layerReader.loadRF(layer).withCenter()
+ val wc = layerReader.loadLayer(layer).withCenter()
withClue("literate API") {
val df = wc.where(CENTER_COLUMN intersects bbox)
@@ -151,7 +154,7 @@ class GeoTrellisDataSourceSpec
}
it("should invoke Encoder[Extent]") {
- val df = layerReader.loadRF(layer).withBounds()
+ val df = layerReader.loadLayer(layer).withGeometry()
assert(df.count > 0)
assert(df.first.length === 5)
assert(df.first.getAs[Extent](2) !== null)
@@ -159,7 +162,7 @@ class GeoTrellisDataSourceSpec
it("should write to parquet") {
//just should not throw
- val df = layerReader.loadRF(layer)
+ val df = layerReader.loadLayer(layer)
assert(write(df))
}
}
@@ -169,23 +172,23 @@ class GeoTrellisDataSourceSpec
val expected = 2
val df = spark.read.geotrellis
.withNumPartitions(expected)
- .loadRF(layer)
+ .loadLayer(layer)
assert(df.rdd.partitions.length === expected)
}
it("should respect partitions 20") {
val expected = 20
val df = spark.read.geotrellis
.withNumPartitions(expected)
- .loadRF(layer)
+ .loadLayer(layer)
assert(df.rdd.partitions.length === expected)
}
it("should respect subdivide 2") {
val param = 2
- val df: RasterFrame = spark.read.geotrellis
+ val df: RasterFrameLayer = spark.read.geotrellis
.withTileSubdivisions(param)
- .loadRF(layer)
+ .loadLayer(layer)
- val dims = df.select(tile_dimensions(df.tileColumns.head)("cols"), tile_dimensions(df.tileColumns.head)("rows")).first()
+ val dims = df.select(rf_dimensions(df.tileColumns.head)("cols"), rf_dimensions(df.tileColumns.head)("rows")).first()
assert(dims.getAs[Int](0) === tileSize / param)
assert(dims.getAs[Int](1) === tileSize / param)
@@ -194,11 +197,11 @@ class GeoTrellisDataSourceSpec
}
it("should respect subdivide with TileFeature"){
val param = 2
- val rf: RasterFrame = spark.read.geotrellis
+ val rf: RasterFrameLayer = spark.read.geotrellis
.withTileSubdivisions(param)
- .loadRF(tfLayer)
+ .loadLayer(tfLayer)
- val dims = rf.select(tile_dimensions(rf.tileColumns.head)("cols"), tile_dimensions(rf.tileColumns.head)("rows"))
+ val dims = rf.select(rf_dimensions(rf.tileColumns.head)("cols"), rf_dimensions(rf.tileColumns.head)("rows"))
.first()
assert(dims.getAs[Int](0) === tileSize / param)
assert(dims.getAs[Int](1) === tileSize / param)
@@ -213,14 +216,14 @@ class GeoTrellisDataSourceSpec
.geotrellis
.withNumPartitions(7)
.withTileSubdivisions(subParam)
- .loadRF(layer)
+ .loadLayer(layer)
// is it partitioned correctly?
assert(rf.rdd.partitions.length === 7)
// is it subdivided?
assert(rf.count === testRdd.count * subParam * subParam)
- val dims = rf.select(tile_dimensions(rf.tileColumns.head)("cols"), tile_dimensions(rf.tileColumns.head)("rows"))
+ val dims = rf.select(rf_dimensions(rf.tileColumns.head)("cols"), rf_dimensions(rf.tileColumns.head)("rows"))
.first()
assert(dims.getAs[Int](0) === tileSize / subParam)
assert(dims.getAs[Int](1) === tileSize / subParam)
@@ -230,7 +233,7 @@ class GeoTrellisDataSourceSpec
val subs = 4
val rf = spark.read.geotrellis
.withTileSubdivisions(subs)
- .loadRF(sampleImageLayer)
+ .loadLayer(sampleImageLayer)
assert(rf.count === (TestData.sampleTileLayerRDD.count * subs * subs))
@@ -247,13 +250,13 @@ class GeoTrellisDataSourceSpec
it("should throw on subdivide 5") {
// only throws when an action is taken...
- assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(5).loadRF(layer).cache)
+ assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(5).loadLayer(layer).cache)
}
it("should throw on subdivide 13") {
- assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(13).loadRF(layer).cache)
+ assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(13).loadLayer(layer).cache)
}
it("should throw on subdivide -3") {
- assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(-3).loadRF(layer).count)
+ assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(-3).loadLayer(layer).count)
}
}
@@ -284,8 +287,8 @@ class GeoTrellisDataSourceSpec
it("should support extent against a geometry literal") {
val df: DataFrame = layerReader
- .loadRF(layer)
- .where(BOUNDS_COLUMN intersects pt1)
+ .loadLayer(layer)
+ .where(GEOMETRY_COLUMN intersects pt1)
assert(numFilters(df) === 1)
@@ -295,8 +298,8 @@ class GeoTrellisDataSourceSpec
it("should support query with multiple geometry types") {
// Mostly just testing that these evaluate without catalyst type errors.
- forEvery(JTS.all) { g ⇒
- val query = layerReader.loadRF(layer).where(BOUNDS_COLUMN.intersects(g))
+ forEvery(GeomData.all) { g ⇒
+ val query = layerReader.loadLayer(layer).where(GEOMETRY_COLUMN.intersects(g))
.persist(StorageLevel.OFF_HEAP)
assert(query.count() === 0)
}
@@ -308,8 +311,8 @@ class GeoTrellisDataSourceSpec
val mkPtFcn = sparkUdf((_: Row) ⇒ { Point(-88, 60).jtsGeom })
val df = layerReader
- .loadRF(layer)
- .where(st_intersects(BOUNDS_COLUMN, mkPtFcn(SPATIAL_KEY_COLUMN)))
+ .loadLayer(layer)
+ .where(st_intersects(GEOMETRY_COLUMN, mkPtFcn(SPATIAL_KEY_COLUMN)))
assert(numFilters(df) === 0)
@@ -320,7 +323,7 @@ class GeoTrellisDataSourceSpec
it("should support temporal predicates") {
withClue("at now") {
val df = layerReader
- .loadRF(layer)
+ .loadLayer(layer)
.where(TIMESTAMP_COLUMN === Timestamp.valueOf(now.toLocalDateTime))
assert(numFilters(df) == 1)
@@ -329,7 +332,7 @@ class GeoTrellisDataSourceSpec
withClue("at earlier") {
val df = layerReader
- .loadRF(layer)
+ .loadLayer(layer)
.where(TIMESTAMP_COLUMN === Timestamp.valueOf(now.minusDays(1).toLocalDateTime))
assert(numFilters(df) === 1)
@@ -338,7 +341,7 @@ class GeoTrellisDataSourceSpec
withClue("between now") {
val df = layerReader
- .loadRF(layer)
+ .loadLayer(layer)
.where(TIMESTAMP_COLUMN betweenTimes (now.minusDays(1), now.plusDays(1)))
assert(numFilters(df) === 1)
@@ -347,7 +350,7 @@ class GeoTrellisDataSourceSpec
withClue("between later") {
val df = layerReader
- .loadRF(layer)
+ .loadLayer(layer)
.where(TIMESTAMP_COLUMN betweenTimes (now.plusDays(1), now.plusDays(2)))
assert(numFilters(df) === 1)
@@ -358,10 +361,10 @@ class GeoTrellisDataSourceSpec
it("should support nested predicates") {
withClue("fully nested") {
val df = layerReader
- .loadRF(layer)
+ .loadLayer(layer)
.where(
- ((BOUNDS_COLUMN intersects pt1) ||
- (BOUNDS_COLUMN intersects pt2)) &&
+ ((GEOMETRY_COLUMN intersects pt1) ||
+ (GEOMETRY_COLUMN intersects pt2)) &&
(TIMESTAMP_COLUMN === Timestamp.valueOf(now.toLocalDateTime))
)
@@ -373,8 +376,8 @@ class GeoTrellisDataSourceSpec
withClue("partially nested") {
val df = layerReader
- .loadRF(layer)
- .where((BOUNDS_COLUMN intersects pt1) || (BOUNDS_COLUMN intersects pt2))
+ .loadLayer(layer)
+ .where((GEOMETRY_COLUMN intersects pt1) || (GEOMETRY_COLUMN intersects pt2))
.where(TIMESTAMP_COLUMN === Timestamp.valueOf(now.toLocalDateTime))
assert(numFilters(df) === 1)
@@ -387,17 +390,17 @@ class GeoTrellisDataSourceSpec
it("should support intersects with between times") {
withClue("intersects first") {
val df = layerReader
- .loadRF(layer)
- .where(BOUNDS_COLUMN intersects pt1)
+ .loadLayer(layer)
+ .where(GEOMETRY_COLUMN intersects pt1)
.where(TIMESTAMP_COLUMN betweenTimes(now.minusDays(1), now.plusDays(1)))
assert(numFilters(df) == 1)
}
withClue("intersects last") {
val df = layerReader
- .loadRF(layer)
+ .loadLayer(layer)
.where(TIMESTAMP_COLUMN betweenTimes(now.minusDays(1), now.plusDays(1)))
- .where(BOUNDS_COLUMN intersects pt1)
+ .where(GEOMETRY_COLUMN intersects pt1)
assert(numFilters(df) == 1)
}
@@ -405,10 +408,10 @@ class GeoTrellisDataSourceSpec
withClue("untyped columns") {
import spark.implicits._
val df = layerReader
- .loadRF(layer)
+ .loadLayer(layer)
.where($"timestamp" >= Timestamp.valueOf(now.minusDays(1).toLocalDateTime))
.where($"timestamp" <= Timestamp.valueOf(now.plusDays(1).toLocalDateTime))
- .where(st_intersects($"bounds", geomLit(pt1.jtsGeom)))
+ .where(st_intersects(GEOMETRY_COLUMN, geomLit(pt1.jtsGeom)))
assert(numFilters(df) == 1)
}
@@ -417,19 +420,19 @@ class GeoTrellisDataSourceSpec
it("should handle renamed spatial filter columns") {
val df = layerReader
- .loadRF(layer)
- .where(BOUNDS_COLUMN intersects region.jtsGeom)
- .withColumnRenamed(BOUNDS_COLUMN.columnName, "foobar")
+ .loadLayer(layer)
+ .where(GEOMETRY_COLUMN intersects region.jtsGeom)
+ .withColumnRenamed(GEOMETRY_COLUMN.columnName, "foobar")
assert(numFilters(df) === 1)
- assert(df.count > 0, df.printSchema)
+ assert(df.count > 0, df.schema.treeString)
}
it("should handle dropped spatial filter columns") {
val df = layerReader
- .loadRF(layer)
- .where(BOUNDS_COLUMN intersects region.jtsGeom)
- .drop(BOUNDS_COLUMN)
+ .loadLayer(layer)
+ .where(GEOMETRY_COLUMN intersects region.jtsGeom)
+ .drop(GEOMETRY_COLUMN)
assert(numFilters(df) === 1)
}
@@ -437,18 +440,18 @@ class GeoTrellisDataSourceSpec
describe("TileFeature support") {
def layerReader = spark.read.geotrellis
- it("should resolve TileFeature-based RasterFrame") {
- val rf = layerReader.loadRF(tfLayer)
+ it("should resolve TileFeature-based RasterFrameLayer") {
+ val rf = layerReader.loadLayer(tfLayer)
//rf.show(false)
assert(rf.collect().length === testRdd.count())
}
- it("should respect subdivideTile option on TileFeature RasterFrame") {
+ it("should respect subdivideTile option on TileFeature RasterFrameLayer") {
val subParam = 4
- val rf = spark.read.option(TILE_SUBDIVISIONS_PARAM, subParam).geotrellis.loadRF(tfLayer)
+ val rf = spark.read.option(TILE_SUBDIVISIONS_PARAM, subParam).geotrellis.loadLayer(tfLayer)
assert(rf.count === testRdd.count * subParam * subParam)
- val dims = rf.select(tile_dimensions(rf.tileColumns.head)("cols"), tile_dimensions(rf.tileColumns.head)("rows"))
+ val dims = rf.select(rf_dimensions(rf.tileColumns.head)("cols"), rf_dimensions(rf.tileColumns.head)("rows"))
.first()
assert(dims.getAs[Int](0) === tileSize / subParam)
assert(dims.getAs[Int](1) === tileSize / subParam)
@@ -459,11 +462,11 @@ class GeoTrellisDataSourceSpec
val rf = spark.read
.option(TILE_SUBDIVISIONS_PARAM, subParam)
.option(NUM_PARTITIONS_PARAM, 10)
- .geotrellis.loadRF(tfLayer)
+ .geotrellis.loadLayer(tfLayer)
// is it subdivided?
assert(rf.count === testRdd.count * subParam * subParam)
- val dims = rf.select(tile_dimensions(rf.tileColumns.head)("cols"), tile_dimensions(rf.tileColumns.head)("rows"))
+ val dims = rf.select(rf_dimensions(rf.tileColumns.head)("cols"), rf_dimensions(rf.tileColumns.head)("rows"))
.first()
assert(dims.getAs[Int](0) === tileSize / subParam)
assert(dims.getAs[Int](1) === tileSize / subParam)
diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala
similarity index 94%
rename from datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala
rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala
index 5d475f263..0cf7e358c 100644
--- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala
+++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala
@@ -15,13 +15,15 @@
* License for the specific language governing permissions and limitations under
* the License.
*
+ * SPDX-License-Identifier: Apache-2.0
+ *
*/
-package astraea.spark.rasterframes.datasource.geotrellis
+package org.locationtech.rasterframes.datasource.geotrellis
-import astraea.spark.rasterframes._
-import astraea.spark.rasterframes.datasource.geotrellis.TileFeatureSupport._
-import astraea.spark.rasterframes.util.{WithCropMethods, WithMaskMethods, WithMergeMethods, WithPrototypeMethods}
+import org.locationtech.rasterframes._
+import org.locationtech.rasterframes.datasource.geotrellis.TileFeatureSupport._
+import org.locationtech.rasterframes.util.{WithCropMethods, WithMaskMethods, WithMergeMethods, WithPrototypeMethods}
import geotrellis.proj4.LatLng
import geotrellis.raster.crop.Crop
import geotrellis.raster.rasterize.Rasterizer
@@ -32,6 +34,7 @@ import geotrellis.spark.tiling._
import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
+import org.locationtech.rasterframes.TestEnvironment
import org.scalatest.BeforeAndAfter
import scala.reflect.ClassTag
@@ -151,8 +154,6 @@ class TileFeatureSupportSpec extends TestEnvironment
object TileFeatureSupportSpec {
- import scala.language.implicitConversions
-
implicit class RichRandom(val rnd: scala.util.Random) extends AnyVal {
def nextDouble(max: Double): Double = (rnd.nextInt * max) / Int.MaxValue.toDouble
def nextOrderedPair(max:Double): (Double,Double) = (nextDouble(max),nextDouble(max)) match {
diff --git a/datasource/src/test/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSourceSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSourceSpec.scala
new file mode 100644
index 000000000..a02c858e1
--- /dev/null
+++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSourceSpec.scala
@@ -0,0 +1,314 @@
+/*
+ * This software is licensed under the Apache 2 license, quoted below.
+ *
+ * Copyright 2019 Astraea, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * [http://www.apache.org/licenses/LICENSE-2.0]
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ */
+
+package org.locationtech.rasterframes.datasource.raster
+import geotrellis.raster.Tile
+import org.apache.spark.sql.functions.{lit, udf, round}
+import org.locationtech.rasterframes.{TestEnvironment, _}
+import org.locationtech.rasterframes.datasource.raster.RasterSourceDataSource.{RasterSourceCatalog, _}
+import org.locationtech.rasterframes.model.TileDimensions
+import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile
+import org.locationtech.rasterframes.util._
+
+class RasterSourceDataSourceSpec extends TestEnvironment with TestData {
+ import spark.implicits._
+
+ describe("DataSource parameter processing") {
+ def singleCol(paths: Iterable[String]) = {
+ val rows = paths.mkString(DEFAULT_COLUMN_NAME + "\n", "\n", "")
+ RasterSourceCatalog(rows, DEFAULT_COLUMN_NAME)
+ }
+
+ it("should handle single `path`") {
+ val p = Map(PATH_PARAM -> "/usr/local/foo/bar.tif")
+ p.catalog should be (Some(singleCol(p.values)))
+ }
+
+ it("should handle single `paths`") {
+ val p = Map(PATHS_PARAM -> "/usr/local/foo/bar.tif")
+ p.catalog should be (Some(singleCol(p.values)))
+ }
+ it("should handle multiple `paths`") {
+ val expected = Seq("/usr/local/foo/bar.tif", "/usr/local/bar/foo.tif")
+ val p = Map(PATHS_PARAM -> expected.mkString("\n\r", "\n\n", "\r"))
+ p.catalog should be (Some(singleCol(expected)))
+ }
+ it("should handle both `path` and `paths`") {
+ val expected1 = Seq("/usr/local/foo/bar.tif", "/usr/local/bar/foo.tif")
+ val expected2 = "/usr/local/barf/baz.tif"
+ val p = Map(PATHS_PARAM -> expected1.mkString("\n"), PATH_PARAM -> expected2)
+ p.catalog should be (Some(singleCol(expected1 :+ expected2)))
+ }
+ it("should parse tile dimensions") {
+ val p = Map(TILE_DIMS_PARAM -> "4, 5")
+ p.tileDims should be (Some(TileDimensions(4, 5)))
+ }
+
+ it("should parse path table specification") {
+ val p = Map(CATALOG_TABLE_PARAM -> "catalogTable", CATALOG_TABLE_COLS_PARAM -> "path")
+ p.pathSpec should be (Right(RasterSourceCatalogRef("catalogTable", "path")))
+ }
+
+ it("should parse path table from CSV") {
+ val bands = Seq("B1", "B2", "B3")
+ val paths = Seq("/usr/local/foo/bar.tif", "/usr/local/bar/foo.tif", "/usr/local/barf/baz.tif")
+ val csv =
+ s"""
+ |${bands.mkString(",")}
+ |${paths.mkString(",")}
+ """.stripMargin.trim
+ val p = Map(CATALOG_CSV_PARAM -> csv)
+ p.pathSpec should be (Left(RasterSourceCatalog(csv)))
+ }
+ }
+
+ describe("RasterSource as relation reading") {
+ val b = DEFAULT_COLUMN_NAME
+
+ it("should default to a single band schema") {
+ val df = spark.read.raster.load(l8B1SamplePath.toASCIIString)
+ val tcols = df.tileColumns
+ tcols.length should be(1)
+ tcols.map(_.columnName) should contain(DEFAULT_COLUMN_NAME)
+ }
+ it("should support a multiband schema") {
+ val df = spark.read
+ .raster
+ .withBandIndexes(0, 1, 2)
+ .load(cogPath.toASCIIString)
+ val tcols = df.tileColumns
+ tcols.length should be(3)
+ tcols.map(_.columnName) should contain allElementsOf Seq("_b0", "_b1", "_b2").map(s => DEFAULT_COLUMN_NAME + s)
+ }
+ it("should read a multiband file") {
+ val df = spark.read
+ .raster
+ .withBandIndexes(0, 1, 2)
+ .load(cogPath.toASCIIString)
+ .cache()
+ df.schema.size should be (4)
+ // Test (roughly) we have three distinct but compabible bands
+ val stats = df.agg(rf_agg_stats($"${b}_b0") as "s0", rf_agg_stats($"${b}_b1") as "s1", rf_agg_stats($"${b}_b2") as "s2")
+ stats.select($"s0.data_cells" === $"s1.data_cells").as[Boolean].first() should be(true)
+ stats.select($"s0.data_cells" === $"s2.data_cells").as[Boolean].first() should be(true)
+ stats.select($"s0.mean" =!= $"s1.mean").as[Boolean].first() should be(true)
+ stats.select($"s0.mean" =!= $"s2.mean").as[Boolean].first() should be(true)
+ }
+ it("should read a single file") {
+ // Image is 1028 x 989 -> 9 x 8 tiles
+ val df = spark.read.raster
+ .withTileDimensions(128, 128)
+ .load(cogPath.toASCIIString)
+
+ df.count() should be(math.ceil(1028.0 / 128).toInt * math.ceil(989.0 / 128).toInt)
+
+ val dims = df.select(rf_dimensions($"$b").as[TileDimensions]).distinct().collect()
+ dims should contain allElementsOf
+ Seq(TileDimensions(4,128), TileDimensions(128,128), TileDimensions(128,93), TileDimensions(4,93))
+
+ df.select($"${b}_path").distinct().count() should be(1)
+ }
+ it("should read a multiple files with one band") {
+ val df = spark.read.raster
+ .from(Seq(cogPath, l8B1SamplePath, nonCogPath))
+ .withTileDimensions(128, 128)
+ .load()
+ df.select($"${b}_path").distinct().count() should be(3)
+ df.schema.size should be(2)
+ }
+ it("should read a multiple files with heterogeneous bands") {
+ val df = spark.read.raster
+ .from(Seq(cogPath, l8B1SamplePath, nonCogPath))
+ .withLazyTiles(false)
+ .withTileDimensions(128, 128)
+ .withBandIndexes(0, 1, 2, 3)
+ .load()
+ .cache()
+ df.select($"${b}_path").distinct().count() should be(3)
+ df.schema.size should be(5)
+
+ df.select($"${b}_b0").count() should be (df.select($"${b}_b0").na.drop.count())
+ df.select($"${b}_b1").na.drop.count() shouldBe <(df.count())
+ df.select($"${b}_b1").na.drop.count() should be (df.select($"${b}_b2").na.drop.count())
+ df.select($"${b}_b3").na.drop.count() should be (0)
+ }
+
+ it("should read a set of coherent bands from multiple files from a CSV") {
+ val bands = Seq("B1", "B2", "B3")
+ val paths = Seq(
+ l8SamplePath(1).toASCIIString,
+ l8SamplePath(2).toASCIIString,
+ l8SamplePath(3).toASCIIString
+ )
+
+ val csv =
+ s"""
+ |${bands.mkString(",")}
+ |${paths.mkString(",")}
+ """.stripMargin.trim
+
+ val df = spark.read.raster
+ .fromCSV(csv)
+ .withTileDimensions(128, 128)
+ .load()
+
+ df.schema.size should be(6)
+ df.tileColumns.size should be (3)
+ df.select($"B1_path").distinct().count() should be (1)
+ }
+
+ it("should read a set of coherent bands from multiple files in a dataframe") {
+ val bandPaths = Seq((
+ l8SamplePath(1).toASCIIString,
+ l8SamplePath(2).toASCIIString,
+ l8SamplePath(3).toASCIIString))
+ .toDF("B1", "B2", "B3")
+ .withColumn("foo", lit("something"))
+
+ val df = spark.read.raster
+ .fromCatalog(bandPaths, "B1", "B2", "B3")
+ .withTileDimensions(128, 128)
+ .load()
+
+ df.schema.size should be(7)
+ df.tileColumns.size should be (3)
+ df.select($"B1_path").distinct().count() should be (1)
+
+ df.columns.contains("foo") should be (true)
+ df.select($"foo").distinct().count() should be (1)
+ df.select($"foo".as[String]).first() should be ("something")
+
+ val diffStats = df.select(rf_tile_stats($"B1") =!= rf_tile_stats($"B2")).as[Boolean].collect()
+ diffStats.forall(identity) should be(true)
+ }
+
+ it("should read a set of coherent bands from multiple files in a csv") {
+ def b(i: Int) = l8SamplePath(i).toASCIIString
+
+ val csv =
+ s"""
+ |B1, B2, B3, foo
+ |${b(1)}, ${b(2)}, ${b(3)}, something
+ """.stripMargin
+
+ val df = spark.read.raster
+ .fromCSV(csv, "B1", "B2", "B3")
+ .withTileDimensions(128, 128)
+ .load()
+
+ df.schema.size should be(7)
+ df.tileColumns.size should be (3)
+ df.select($"B1_path").distinct().count() should be (1)
+
+ df.columns.contains("foo") should be (true)
+ df.select($"foo").distinct().count() should be (1)
+ df.select($"foo".as[String]).first() should be ("something")
+
+ val diffStats = df.select(rf_tile_stats($"B1") =!= rf_tile_stats($"B2")).as[Boolean].collect()
+ diffStats.forall(identity) should be(true)
+ }
+
+ it("should support lazy and strict reading of tiles") {
+ val is_lazy = udf((t: Tile) => {
+ t.isInstanceOf[RasterRefTile]
+ })
+
+ val df1 = spark.read.raster
+ .withLazyTiles(true)
+ .load(l8SamplePath(1).toASCIIString)
+
+ df1.select(is_lazy($"proj_raster.tile").as[Boolean]).first() should be (true)
+
+ val df2 = spark.read.raster
+ .withLazyTiles(false)
+ .load(l8SamplePath(1).toASCIIString)
+
+ df2.select(is_lazy($"proj_raster.tile").as[Boolean]).first() should be (false)
+
+ }
+ }
+
+ describe("RasterSource breaks up scenes into tiles") {
+ val modis_df = spark.read.raster
+ .withTileDimensions(128, 128)
+ .withLazyTiles(true)
+ .load(remoteMODIS.toASCIIString)
+
+ val l8_df = spark.read.raster
+ .withTileDimensions(32, 33)
+ .withLazyTiles(true)
+ .load(remoteL8.toASCIIString)
+
+ ignore("should have at most four tile dimensions reading MODIS; ignore until fix #242") {
+ val dims = modis_df.select(rf_dimensions($"proj_raster")).distinct().collect()
+ dims.length should be > (0)
+ dims.length should be <= (4)
+ }
+
+ it("should have at most four tile dimensions reading landsat") {
+ val dims = l8_df.select(rf_dimensions($"proj_raster")).distinct().collect()
+ dims.length should be > (0)
+ dims.length should be <= (4)
+ }
+
+ it("should provide MODIS tiles with requested size") {
+ val res = modis_df
+ .withColumn("dims", rf_dimensions($"proj_raster"))
+ .select($"dims".as[TileDimensions]).distinct().collect()
+
+ forEvery(res) { r =>
+ r.cols should be <=128
+ r.rows should be <=128
+ }
+ }
+
+ it("should provide Landsat tiles with requested size") {
+ val dims = l8_df
+ .withColumn("dims", rf_dimensions($"proj_raster"))
+ .select($"dims".as[TileDimensions]).distinct().collect()
+
+ forEvery(dims) { d =>
+ d.cols should be <=32
+ d.rows should be <=33
+ }
+ }
+
+ it("should have consistent tile resolution reading MODIS") {
+ val res = modis_df
+ .withColumn("ext", rf_extent($"proj_raster"))
+ .withColumn("dims", rf_dimensions($"proj_raster"))
+ .select(round(($"ext.xmax" - $"ext.xmin") / $"dims.cols", 5))
+ .distinct().collect()
+ withClue(res.mkString("(", ", ", ")")) {
+ res.length should be(1)
+ }
+ }
+
+ it("should have consistent tile resolution reading Landsat") {
+ val res = l8_df
+ .withColumn("ext", rf_extent($"proj_raster"))
+ .withColumn("dims", rf_dimensions($"proj_raster"))
+ .select(($"ext.xmax" - $"ext.xmin") / $"dims.cols")
+ .distinct().collect()
+ res.length should be (1)
+ }
+ }
+}
diff --git a/deployment/README.md b/deployment/README.md
deleted file mode 100644
index 5e008b8a1..000000000
--- a/deployment/README.md
+++ /dev/null
@@ -1,66 +0,0 @@
-# RasterFrames Jupyter Notebook Docker Container
-
-RasterFrames provides a Docker image with a Jupyter Notebook pre-configured with RasterFrames support for Python 3 and Scala Spylon kernels.
-
-## Quick start
-
-This will use the [latest image](https://hub.docker.com/r/s22s/rasterframes-notebooks/) published to Docker Hub.
-
-```bash
-# Optionally pull the latest image.
-$ docker pull s22s/rasterframes-notebooks
-
-# from root of the git repo
-$ cd deployment/docker/jupyter
-$ docker-compose up
-```
-
-## Custom run
-
-The `docker-compose` incantation automatically exposes port 8888 for the Jupyter Notebook and ports ports 4040-4044 for the Spark UI.
-
-The image can equivalently be run with:
-
- $ docker run -it --rm -p 8888:8888 -p 4040-4044:4040-4044 s22s/rasterframes-notebooks
-
-The `docker run` command can be changed to quickly customize the container.
-
-To mount a directory on the host machine (to load or save local files directly from Jupyter) add
-
- -v /some/host/folder/for/work:/home/jovyan/work
-
-to the command.
-
-Attach the notebook server to a different host port with
-
- -p 8630:8888
-
-if you already have a notebook server running on port 8888.
-
-If you want to use a known password, use
-
-```bash
-docker run -it --rm -p 8888:8888 -p 4040-4044:4040-4044 \
- s22s/rasterframes-notebooks \
- start-notebook.sh --NotebookApp.password='sha1:1c360e8dd3e1:946d17ef9e6b8cbb28c7bb0152329786918cc424'
-```
-
-Where the password sha is generated with [`notebook.auth.passwd`](https://jupyter-notebook.readthedocs.io/en/stable/public_server.html#preparing-a-hashed-password).
-
-Please see the `Dockerfile` and the `docker-compose.yml` file on GitHub ([here](https://github.com/locationtech/rasterframes/tree/develop/deployment/docker/jupyter)) as a starting point to customize your image and container.
-
-
-## For Development
-
-To build the Docker image based on local development changes:
-
-```bash
-# from the root of the repo
-sbt deployment/rfDocker
-```
-
-## Base images
-
-This image is based on [jupyter/pyspark-notebook](https://hub.docker.com/r/jupyter/pyspark-notebook), with some
-portions from [jupyter/all-spark-notebook](https://hub.docker.com/r/jupyter/all-spark-notebook).
-Much more extensive instructions can be found at those locations.
\ No newline at end of file
diff --git a/deployment/build.sbt b/deployment/build.sbt
deleted file mode 100644
index c76ef554b..000000000
--- a/deployment/build.sbt
+++ /dev/null
@@ -1,56 +0,0 @@
-import sbt.{IO, _}
-
-import scala.sys.process.Process
-
-moduleName := "rasterframes-deployment"
-
-val Docker = config("docker")
-val Python = config("python")
-
-
-lazy val rfDockerImageName = settingKey[String]("Name to tag Docker image with.")
-rfDockerImageName := "s22s/rasterframes-notebooks"
-
-lazy val rfDocker = taskKey[Unit]("Build Jupyter Notebook Docker image with RasterFrames support.")
-rfDocker := (Docker / packageBin).value
-
-lazy val runRFNotebook = taskKey[String]("Run RasterFrames Jupyter Notebook image")
-runRFNotebook := {
- val imageName = rfDockerImageName.value
- val _ = rfDocker.value
- Process(s"docker run -p 8888:8888 -p 4040:4040 $imageName").run()
- imageName
-}
-
-Docker / resourceDirectory := baseDirectory.value / "docker"/ "jupyter"
-
-Docker / target := target.value / "docker"
-
-Docker / mappings := {
- val rezDir = (Docker / resourceDirectory).value
- val files = (rezDir ** "*") pair Path.relativeTo(rezDir)
-
- val jar = (assembly in LocalProject("pyrasterframes")).value
- val py = (packageBin in (LocalProject("pyrasterframes"), Python)).value
-
- files ++ Seq(jar -> jar.getName, py -> py.getName)
-}
-
-def rfFiles = Def.task {
- val destDir = (Docker / target).value
- val filePairs = (Docker / mappings).value
- IO.copy(filePairs.map { case (src, dst) ⇒ (src, destDir / dst) })
-}
-
-Docker / packageBin := {
- val _ = rfFiles.value
- val logger = streams.value.log
- val staging = (Docker / target).value
- val ver = (version in LocalRootProject).value
-
- logger.info(s"Running docker build in $staging")
- val imageName = rfDockerImageName.value
- Process("docker-compose build", staging).!
- Process(s"docker tag $imageName:latest $imageName:$ver", staging).!
- staging
-}
diff --git a/deployment/docker/jupyter/Dockerfile b/deployment/docker/jupyter/Dockerfile
deleted file mode 100644
index ebf52fdac..000000000
--- a/deployment/docker/jupyter/Dockerfile
+++ /dev/null
@@ -1,69 +0,0 @@
-FROM jupyter/pyspark-notebook:92fe05d1e7e5
-
-MAINTAINER Astraea, Inc.
-
-ENV RF_LIB_LOC /usr/lib
-ENV RF_JAR $RF_LIB_LOC/rasterframes.jar
-ENV PY_RF_ZIP $RF_LIB_LOC/pyrasterframes.zip
-
-USER root
-
-RUN echo "spark.driver.extraClassPath $RF_JAR" >> /usr/local/spark/conf/spark-defaults.conf && \
- echo "spark.executor.extraClassPath $RF_JAR" >> /usr/local/spark/conf/spark-defaults.conf
-
-EXPOSE 4040 4041 4042 4043 4044
-
-ENV SPARK_OPTS $SPARK_OPTS \
- --py-files $PY_RF_ZIP \
- --jars $RF_JAR \
- --driver-class-path $RF_JAR \
- --conf spark.executor.extraClassPath=$RF_JAR
-
-ENV PYTHONPATH $PYTHONPATH:$PY_RF_ZIP
-
-
-#================================
-# Copied from all-spark-notebook
-#================================
-
-# TODO: resolve the issue that toree has with --py-files, above (it does not like .zips and
-# TODO: the kernel will not start)
-# Apache Toree kernel
-#RUN pip install --no-cache-dir \
-# https://dist.apache.org/repos/dist/dev/incubator/toree/0.2.0-incubating-rc5/toree-pip/toree-0.2.0.tar.gz \
-# && \
-# jupyter toree install --sys-prefix && \
-# rm -rf /home/$NB_USER/.local && \
-# fix-permissions $CONDA_DIR && \
-# fix-permissions /home/$NB_USER
-
-# Spylon-kernel
-RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \
- conda clean -tipsy && \
- python -m spylon_kernel install --sys-prefix
-
-# Sphinx (for Notebook->html)
-RUN conda install --quiet --yes \
- sphinx nbsphinx
-
-# Cleanup pip residuals
-RUN rm -rf /home/$NB_USER/.local && \
- fix-permissions $CONDA_DIR && \
- fix-permissions /home/$NB_USER
-
-# Do these after the standard environment setup
-# since these change more regularly.
-COPY *.zip $PY_RF_ZIP
-COPY *.jar $RF_JAR
-
-RUN chown -R $NB_UID:$NB_GID $HOME
-
-USER $NB_UID
-
-# RUN pip install guzzle_sphinx_theme
-
-# TODO: This repo can change regularly without docker knowing that the
-# TODO: Layer this command is written in has become stale. Need to either
-# TODO: clone a specific revision that we manually update, or keep this
-# TODO: last, assuming the prior commends will be detected as stale.
-RUN git clone http://github.com/s22s/rasterframes-book && ln -s rasterframes-book/Python/samples
diff --git a/deployment/docker/jupyter/README.md b/deployment/docker/jupyter/README.md
deleted file mode 100644
index 815b78d8e..000000000
--- a/deployment/docker/jupyter/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# RasterFrames Jupyter Notebook
-
-Please visit `rasterframe-book` directory for example notebooks.
\ No newline at end of file
diff --git a/deployment/docker/jupyter/docker-compose.yml b/deployment/docker/jupyter/docker-compose.yml
deleted file mode 100644
index 29f311c0f..000000000
--- a/deployment/docker/jupyter/docker-compose.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-version: '3'
-
-services:
- rasterframes-notebooks:
- build: .
- image: s22s/rasterframes-notebooks
- ports:
- # jupyter notebook port
- - "8888:8888"
- # spark UI ports
- - "4040:4040"
- - "4041:4041"
- - "4042:4042"
- - "4043:4043"
- - "4044:4044"
-# To save locally at './work' from the container:
-# volumes:
-# - ./work:/home/jovyan/work
\ No newline at end of file
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 000000000..0c4925ba1
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,128 @@
+# RasterFrames Documentation
+
+The conceptual principles to consider when writing RasterFrames users' documentation are covered in [Documentation Principles](documentation-principles.md). This document covers the mechanics of writing, evaluating, and building the documentation during the writing process.
+
+## Organization
+
+The documentation build is a two step process, whereby two sources (three if API docs are included) are merged together and converted into a static HTML website. They are:
+
+* Technical content and Python examples: `/pyrasterframes/src/main/python/docs`
+* Global documentation assets and Scala specific content: `/docs/src/main/paradox`
+
+The build constructs in `/docs` are (due to legacy reasons) the top-level mechanisms of bringing it all together, but the meat of the content is in `/pyrasterframes/...`, and will be the focus of most of this document.
+
+## Prerequisites
+
+* [`sbt`](https://www.scala-sbt.org/)
+* Python 3
+* Markdown editor. [Visual Studio Code](https://code.visualstudio.com/) with [`language-weave` extension](https://marketplace.visualstudio.com/items?itemName=jameselderfield.language-weave) is one option. [Atom](https://atom.io/) is another which might actually have better support for evaluating code in Markdown, but I've not tried it.
+
+> Note: If you're using Visual Studio Code, you can associate the `.pymd` with the `language-weave` plugin by adding this to your `settings.json` file.
+
+```json
+"files.associations": {
+ "*.pymd": "pweave_md"
+}
+```
+
+## Building the docs
+
+To build the static site locally:
+
+ sbt makeSite
+
+The site will be at `/docs/target/site/index.html`.
+
+
+To start an interactive server running the docs:
+
+ sbt previewSite
+
+The sbt server log a message with an address to view the site.
+
+## Content Development Process
+
+Start with one of the existing files in `/pyrasterframes/src/main/python/docs` as a template. [`local-algebra.pymd`](../pyrasterframes/src/main/python/docs/local-algebra.pymd) is a good example. If the content will have code blocks you want evaluated an results injected into the output, use the file extension `.pymd`. If the content doesn't use evaluatable code blocks, use `.md`.
+
+All `.pymd` files are processed with a tool called [Pweave](http://mpastell.com/pweave), which produces a regular Markdown file where identified code blocks are evaluated and their results (optionally) included in the text. Matplot lib is supported! It is much like `knitr` in the R community. If we run into issues with Pweave, we can also consider [`knitpy`](https://github.com/jankatins/knitpy) or [`codebraid`](https://github.com/gpoore/codebraid). Codebraid looks particularly powerful, so we may think to transition to it.
+
+Pweave has a number of [code chunk options](http://mpastell.com/pweave/chunks.html) for controlling the output. Refer to those documents on details, and experiment a little to see what conveys your intent best.
+
+To set up an environment whereby you can easily test/evaluate your code blocks during writing:
+
+1. Run `sbt` from the `` directory. You should get output that looks something like:
+ ```
+ $ sbt
+ ...
+ [info] Loading settings for project pyrasterframes from build.sbt ...
+ [info] Loading settings for project rf-notebook from build.sbt ...
+ [info] Set current project to RasterFrames (in build file:/)
+ sbt:RasterFrames>
+ ```
+2. The first time you check out the code, or whenever RasterFrames code is updated, you need to build the project artifacts so they are available for Pweave. Some docs also refer to test resources, so the easiest way to do it is to run the unit tests.
+ ```
+ sbt:RasterFrames> pyrasterframes/test
+ [info] Compiling 4 Scala sources to /core/target/scala-2.11/classes ...
+ ... lots of noise ...
+ [info] PyRasterFrames assembly written to '/pyrasterframes/target/python/deps/jars/pyrasterframes-assembly-0.8.0-SNAPSHOT.jar'
+ [info] Synchronizing 44 files to '/pyrasterframes/target/python'
+ [info] Running 'python setup.py build bdist_wheel' in '/pyrasterframes/target/python'
+ ... more noise ...
+ [info] Python .whl file written to '/pyrasterframes/target/python/dist/pyrasterframes-0.8.0.dev0-py2.py3-none-any.whl'
+ [success] Total time: 83 s, completed Jul 5, 2019 12:25:48 PM
+ sbt:RasterFrames>
+ ```
+3. To evaluate all the `.pymd` files, run:
+ ```
+ sbt:RasterFrames> pyrasterframes/pySetup pweave
+ ```
+ To build the artifact (step 1) and evaluate all the `.pymd` files, you can run:
+ ```
+ sbt:RasterFrames> pyrasterframes/doc
+ ```
+ There's a command alias for this last step: `pyDocs`.
+4. To evaluate a single `.pymd` file, you pass the `-s` option and the filename relative to the `pyraterframes/src/main/python` directory. You can also specify the output [format](http://mpastell.com/pweave/formats.html) with the `-f` argument.
+ ```
+ sbt:RasterFrames> pyrasterframes/pySetup pweave -s docs/getting-started.pymd
+ [info] Synchronizing 44 files to '/pyrasterframes/target/python'
+ [info] Running 'python setup.py pweave -s docs/getting-started.pymd' in '/pyrasterframes/target/python'
+ running pweave
+ --------------------------------------------------
+ Running getting-started
+ --------------------------------------------------
+ status
+ status
+ Processing chunk 1 named None from line 14
+ ...
+ Weaved docs/getting-started.pymd to docs/getting-started.md
+ ```
+5. The _output_ Markdown files are written to `/pyrasterframes/target/python/docs`. _Note_: don't edit any files in the `pyrasterframes/target` directory... they will get overwritten each time `sbt` runs a command.
+6. During content development it's sometimes helpful to see the output rendered as basic HTML. To do this, add the `-f html` option to the pweave command:
+ ```
+ sbt:RasterFrames> pyrasterframes/pySetup pweave -f html -s docs/getting-started.pymd
+ [info] Synchronizing 54 files to '/pyrasterframes/target/python'
+ [info] Running 'python setup.py pweave -f html -s docs/getting-started.pymd' in '/pyrasterframes/target/python'
+ running pweave
+ --------------------------------------------------
+ Running getting-started
+ --------------------------------------------------
+ ...
+ Weaved docs/getting-started.pymd to docs/getting-started.html
+ ```
+ Note: This feature requires `pandoc` to be installed.
+7. To build all the documentation and convert to a static html site, run:
+ ```bash
+ sbt makeSite
+ ```
+ Results will be found in `/docs/target/site`.
+
+## Notebooks
+
+The `rf-notebooks` sub-project creates a Docker image with Jupyter Notebooks pre-configured with RasterFrames. Any `.pymd` file under `.../python/docs/` is converted to an evaluated Jupyter Notebook and included as a part of the build.
+
+## Submission Process
+
+Submit new and updated documentation as a PR against locationtech/rasterframes. Make sure you've signed the Eclipse Foundation ECA and you ["Signed-off-by:"](https://stackoverflow.com/questions/1962094/what-is-the-sign-off-feature-in-git-for) each commit in the PR. The "Signed-off-by" email address needs to be the exact same one as registered with the [Eclipse Foundation](https://wiki.eclipse.org/Development_Resources/Contributing_via_Git).
+
+If you are using circle CI, the circle configuration is set up to build the docs with `sbt makeSite` for branch names matching `feature/.*docs.*` or `docs/.*`
+
diff --git a/docs/build.sbt b/docs/build.sbt
index 3025e7c75..59f734a48 100644
--- a/docs/build.sbt
+++ b/docs/build.sbt
@@ -1,41 +1,79 @@
-import com.typesafe.sbt.SbtGit.git
+// task to create documentation PDF
+lazy val makePDF = taskKey[File]("Build PDF version of documentation")
+lazy val pdfFileName = settingKey[String]("Name of the PDF file generated")
+pdfFileName := s"RasterFrames-Users-Manual-${version.value}.pdf"
-enablePlugins(SiteScaladocPlugin, ParadoxPlugin, TutPlugin, GhpagesPlugin, ScalaUnidocPlugin)
+makePDF := {
+ import scala.sys.process._
-name := "rasterframes-docs"
+ // Get the python source directory configured in the root project.
+ val base = (Compile / paradox / sourceDirectories).value.find(_.toString.contains("python")).head
-libraryDependencies ++= Seq(
- spark("mllib").value % Tut,
- spark("sql").value % Tut
-)
+ // Hard coded lacking any simple way of determining order.
+ val files = Seq(
+ "index.md",
+ "description.md",
+ "concepts.md",
+ "getting-started.md",
+ "raster-io.md",
+ "raster-catalogs.md",
+ "raster-read.md",
+ "raster-write.md",
+ "vector-data.md",
+ "raster-processing.md",
+ "local-algebra.md",
+ "nodata-handling.md",
+ "aggregation.md",
+ "time-series.md",
+ "machine-learning.md",
+ "unsupervised-learning.md",
+ "supervised-learning.md",
+ "numpy-pandas.md",
+ "languages.md",
+ "reference.md"
+ ).map(base ** _).flatMap(_.get)
-git.remoteRepo := "git@github.com:locationtech/rasterframes.git"
-apiURL := Some(url("http://rasterframes.io/latest/api"))
-autoAPIMappings := true
-ghpagesNoJekyll := true
+ val log = streams.value.log
+ log.info("Section ordering:")
+ files.foreach(f => log.info(" - " + f.getName))
-ScalaUnidoc / siteSubdirName := "latest/api"
-paradox / siteSubdirName := "."
+ val work = target.value / "makePDF"
+ work.mkdirs()
-addMappingsToSiteDir(ScalaUnidoc / packageDoc / mappings, ScalaUnidoc / siteSubdirName)
-addMappingsToSiteDir(Compile / paradox / mappings, paradox / siteSubdirName)
+ val prepro = files.zipWithIndex.map { case (f, i) ⇒
+ val dest = work / f"$i%02d-${f.getName}%s"
+ // Filter cross links and add a newline
+ (Seq("sed", "-e", """s/@ref://g;s/@@.*//g""", f.toString) #> dest).!
+ // Add newline at the end of the file so as to make pandoc happy
+ ("echo" #>> dest).!
+ ("echo \\pagebreak" #>> dest).!
+ dest
+ }
-paradoxProperties ++= Map(
- "github.base_url" -> "https://github.com/locationtech/rasterframes",
- "version" -> version.value,
- "scaladoc.org.apache.spark.sql.gt" -> "http://rasterframes.io/latest"
- //"scaladoc.geotrellis.base_url" -> "https://geotrellis.github.io/scaladocs/latest",
- // "snip.pyexamples.base_dir" -> (baseDirectory.value + "/../pyrasterframes/python/test/examples")
-)
-paradoxTheme := Some(builtinParadoxTheme("generic"))
-//paradoxTheme / sourceDirectory := sourceDirectory.value / "main" / "paradox" / "_template"
+ val output = target.value / pdfFileName.value
-Compile / doc / scalacOptions++= Seq( "-J-Xmx6G", "-no-link-warnings")
+ val header = (Compile / sourceDirectory).value / "latex" / "header.latex"
-Tut / run / fork := true
+ val args = "pandoc" ::
+ "--from=markdown+pipe_tables" ::
+ "--to=pdf" ::
+ "-t" :: "latex" ::
+ "-s" ::
+ "--toc" ::
+ "-V" :: "title:RasterFrames Users' Manual" ::
+ "-V" :: "author:Astraea, Inc." ::
+ "-V" :: "geometry:margin=0.75in" ::
+ "-V" :: "papersize:letter" ::
+ "--include-in-header" :: header.toString ::
+ "-o" :: output.toString ::
+ prepro.map(_.toString).toList
-Tut / run / javaOptions := Seq("-Xmx8G", "-Dspark.ui.enabled=false")
+ log.info(s"Running: ${args.mkString(" ")}")
+ Process(args, base).!
-Compile / paradox := (Compile / paradox).dependsOn(tutQuick).value
-Compile / paradox / sourceDirectory := tutTargetDirectory.value
-makeSite := makeSite.dependsOn(Compile / unidoc).dependsOn(Compile / paradox).value
+ log.info("Wrote: " + output)
+
+ output
+}
+
+makePDF := makePDF.dependsOn(Compile / paradox).value
diff --git a/docs/documentation-principles.md b/docs/documentation-principles.md
new file mode 100644
index 000000000..3f626ddb8
--- /dev/null
+++ b/docs/documentation-principles.md
@@ -0,0 +1,98 @@
+# Documentation Principles
+
+This document outlines some concrete considerations for the planned rewrite of the _RasterFrames Users' Manual_.
+See [`docs/README.md`](https://github.com/locationtech/rasterframes/blob/develop/docs/README.md) for technical details on the mechanics of building the documentation.
+
+## Title
+
+The project is "RasterFrames". Documentation shall use the name "RasterFrames". The RasterFrames runtime is deployed in two forms: `rasterframes` (for Scala/Java/SQL-only), and `pyrasterframes` (Python). But the user should know and think of the project as one thing: RasterFrames.
+
+## Format
+
+The documentation shall be rendered in Markdown (`.md`) and Python Markdown (`.pymd`). The source of this documentation is committed in the same project as the code, in `pyrasterframes/src/main/python/docs`. Additional details on processing the docs can be found [here](https://github.com/locationtech/rasterframes/tree/develop/pyrasterframes#running-python-markdown-sources).
+
+Filenames shall be in skewer case; lower case with dashes ('-') separating words. For example, `foo-in-bar-with-baz.md`.
+
+## Target Audience
+
+The target audience for the _RasterFrames User's Manual_ is the intermediate data scientist or developer, already adept at either Python or Scala. Eventually this should be expanded to include SQL adepts. This user may or may not be an expert at EO data usage, so attention to jargon, undefined terms, new concepts etc. should be kept in mind, making use of authoritative external resources to fill in knowledge.
+
+> Enumerate concepts they are aware of, including:
+> * Scene discretization
+> * Temporal revisit rate
+> * Spatial resolution
+> * Spatial extent
+
+The user may or may not be familiar with Apache Spark, so they should also be guided in filling in minimum requisite knowledge. At a minimum we have to explain what a `SparkSession` is, and that we have to configure it; note the difference between an "action" and "transformation"; what a "collect" action is, and the consequences if the data is large; awareness of partitioning.
+
+There's also an opportunity to emphasize the scalability benefits over, say, a rasterio/Pandas-only solution (but that we interop with them too).
+
+The users' goals with EO data are could be from a number of different perspectives:
+
+* Creating map layers
+* Statistical analysis
+* Machine learning
+* Change detection
+* Chip preparation
+
+While the emphasis of the documentation should be on the core capabilities (and flexibility therein) of RasterFrames, a nod toward these various needs in the examples shown can go a long way in helping the user understand appropriateness of the library to their problem domain.
+
+## Pedagogical Technique
+
+The documentation shall emphasize the use of executable code examples, with interspersed prose to explain them. The RasterFrames tooling supports the `.pymd` (Python Markdown) format whereby delimited code blocks are evaluated at build time in order to include output/results. The project currently uses ['Pweave'](http://mpastell.com/pweave/chunks.html) to do this (this may change, but `.pymd` will remain the source format). `Pweave` also has the ability to convert `.pymd` to Jupyter Notebooks, which may serve useful. Through this process we can be assured that any examples shown are code the user can copy into their own projects.
+
+Visuals are always helpful, but even more so when there's a lot of code involved that needs continual contextualization and explanation. `Pweave` supports rendering of `matplotlib` charts/images, a capability we should make use of. Furthermore, where beneficial, we should create diagrams or other visuals to help express concepts and relationships.
+
+This "code-first" focus is admittedly in tension with the competing need to explain some of the more abstract aspects of distributed computing necessary for advanced monitoring, profiling, optimization, and deployment. We should evolve the documentation over the long term to address some of these needs, but in the near term the focus should be on the core conceptual model necessary for understanding tile processing. Diagrams can be helpful here.
+
+## Polyglot Considerations
+
+In terms of implementation, RasterFrames is a Scala project first. All algorithmic, data modeling, and heavy lifting, etc. are implemented in Scala.
+
+However, due to user base preferences, RasterFrames is primarily _deployed_ through Python. As such, documentation, examples, etc. should first be implemented in Python.
+
+Secondarily to that, SQL should be used to highlight the analyst-friendly expression of the functionality in SQL. At least a handful of examples in a dedicated SQL page would go far in showing the cross-language support.
+
+Thirdly, Scala developers should be encouraged to use the platform, clearly stating that the APIs are on equal footing, using consistent naming conventions, etc. and that most examples should translate almost one-to-one.
+
+In the long term I'd love to see Python, Scala, and SQL all treated in equal footing, with examples expressed in all languages, but that's a tall order this early in the project development.
+
+## User Journey
+
+As noted in the _Target Audience_ section, the documentation needs to guide the user through process from curiosity around EO data to scalable processing of it. Within the first section or so the user should see an example that reads an image and does something somewhat compelling with it, noting that the same code will work on a laptop with a small amount of imagery, as well as on 100s of computers with TB (or more) of imagery. Problems "solved in the small" can be grown to "global scale".
+
+With a "journey" focus, concepts and capabilities are introduced incrementally, building upon previous examples and concepts, adding more complexity as it develops. Once the fundamentals are covered, we then move into the examples that are closer to use-cases or cookbook entries.
+
+The preliminary outline is a follows, but is open for refinement, rethinking, etc.
+
+1. Description
+2. Architecture
+3. Getting Started
+ * `pyspark` shell
+ * Jupyter Notebook
+ * Standalone Python Script
+4. Raster Data I/O
+ * Reading Raster Data
+ * Writing Raster Data
+5. Spatial Relations
+6. Raster Processing
+ * Local Algebra
+ * “NoData” Handling
+ * Aggregation
+ * Time Series
+ * Spark ML Support
+ * Pandas and NumPy Interoperability
+7. Cookbook Examples
+8. Extended Examples / Case Studies
+9. Function Reference
+
+## Hiccups
+
+During the documentation process we are likely to run into problems whereby the goal of the writer is inhibited by a bug or capability gap in the code. We should use this opportunity to improve the library to provide the optimal user experience before resorting to workarounds or hacks or addition of what might seem to be arbitrary complexity.
+
+## Testing
+
+To "be all that we can be", testing the documentation against a new user is a boon. It may be hard to capture volunteers to do this, but we should consider enlisting interns and friends of the company to go through the documentation and give feedback on where gaps exist.
+
+
+
diff --git a/docs/src/main/latex/header.latex b/docs/src/main/latex/header.latex
new file mode 100644
index 000000000..50a53b5da
--- /dev/null
+++ b/docs/src/main/latex/header.latex
@@ -0,0 +1,9 @@
+\DeclareUnicodeCharacter{2218}{$\circ$}
+\DeclareUnicodeCharacter{2714}{$\checkmark$}
+\DeclareUnicodeCharacter{21A9}{$\newline$}
+\hypersetup{
+ colorlinks=true,
+ linkcolor=blue,
+ allbordercolors={0 0 0},
+ pdfborderstyle={/S/U/W 1}
+}
\ No newline at end of file
diff --git a/docs/src/main/tut/CNAME b/docs/src/main/paradox/CNAME
similarity index 100%
rename from docs/src/main/tut/CNAME
rename to docs/src/main/paradox/CNAME
diff --git a/docs/src/main/paradox/RasterFramePipeline.png b/docs/src/main/paradox/RasterFramePipeline.png
new file mode 100644
index 000000000..26900b8cf
Binary files /dev/null and b/docs/src/main/paradox/RasterFramePipeline.png differ
diff --git a/docs/src/main/tut/RasterFramePipeline.svg b/docs/src/main/paradox/RasterFramePipeline.svg
similarity index 100%
rename from docs/src/main/tut/RasterFramePipeline.svg
rename to docs/src/main/paradox/RasterFramePipeline.svg
diff --git a/docs/src/main/paradox/_template/page.st b/docs/src/main/paradox/_template/page.st
index c264d9e7c..2a32dd170 100644
--- a/docs/src/main/paradox/_template/page.st
+++ b/docs/src/main/paradox/_template/page.st
@@ -31,6 +31,8 @@
.md-left { float: left; }
.md-right { float: right; }
.md-clear { clear: both; }
+ table { font-size: 80%; }
+ code { font-size: 0.75em !important; }
@@ -132,7 +134,7 @@