diff --git a/.circleci/config.yml b/.circleci/config.yml index 32e5a1e18..e9d3844bf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ _defaults: &defaults environment: TERM: dumb docker: - - image: circleci/openjdk:8-jdk + - image: s22s/rasterframes-circleci:latest _setenv: &setenv name: set CloudRepo credentials @@ -19,81 +19,111 @@ _delenv: &unsetenv _restore_cache: &restore_cache keys: - - v2-dependencies-{{ checksum "build.sbt" }} - - v2-dependencies- + - v3-dependencies-{{ checksum "build.sbt" }} + - v3-dependencies- _save_cache: &save_cache - key: v2-dependencies--{{ checksum "build.sbt" }} + key: v3-dependencies--{{ checksum "build.sbt" }} paths: + - ~/.cache/coursier - ~/.ivy2/cache - ~/.sbt - - ~/.rf_cache + - ~/.local jobs: - staticAnalysis: + test: <<: *defaults - + resource_class: large steps: - checkout - run: *setenv - restore_cache: <<: *restore_cache - - run: cat /dev/null | sbt dependencyCheck - - run: cat /dev/null | sbt --debug dumpLicenseReport + - run: ulimit -c unlimited -S + - run: cat /dev/null | sbt -batch core/test datasource/test experimental/test pyrasterframes/test + - run: + command: | + mkdir -p /tmp/core_dumps + cp core.* *.hs /tmp/core_dumps 2> /dev/null || true + when: on_fail - - run: *unsetenv + - store_artifacts: + path: /tmp/core_dumps + - run: *unsetenv - save_cache: <<: *save_cache - - store_artifacts: - path: datasource/target/scala-2.11/dependency-check-report.html - destination: dependency-check-report-datasource.html - - store_artifacts: - path: experimental/target/scala-2.11/dependency-check-report.html - destination: dependency-check-report-experimental.html - - store_artifacts: - path: core/target/scala-2.11/dependency-check-report.html - destination: dependency-check-report-core.html - - store_artifacts: - path: pyrasterframes/target/scala-2.11/dependency-check-report.html - destination: dependency-check-report-pyrasterframes.html - test: + docs: <<: *defaults - resource_class: large + resource_class: xlarge steps: - checkout - run: *setenv + - restore_cache: <<: *restore_cache - - run: sudo apt-get install python-pip pandoc && pip install setuptools # required for pyrasterframes testing - - run: cat /dev/null | sbt test + - run: ulimit -c unlimited -S + - run: pip3 install --quiet --user -r pyrasterframes/src/main/python/requirements.txt + - run: + command: cat /dev/null | sbt makeSite + no_output_timeout: 30m + + - run: + command: | + mkdir -p /tmp/core_dumps + cp core.* *.hs /tmp/core_dumps 2> /dev/null || true + when: on_fail + + - store_artifacts: + path: /tmp/core_dumps + + - store_artifacts: + path: docs/target/site + destination: rf-site - run: *unsetenv + - save_cache: <<: *save_cache - publish: + it: <<: *defaults - resource_class: large + resource_class: xlarge steps: - checkout - run: *setenv + - restore_cache: <<: *restore_cache - - run: sudo apt-get install python-pip pandoc && pip install setuptools # required for pyrasterframes testing - - run: cat /dev/null | sbt test - - run: cat /dev/null | sbt publish + - run: ulimit -c unlimited -S + - run: + command: cat /dev/null | sbt it:test + no_output_timeout: 30m + + - run: + command: | + mkdir -p /tmp/core_dumps + cp core.* *.hs /tmp/core_dumps 2> /dev/null || true + when: on_fail + + - store_artifacts: + path: /tmp/core_dumps - run: *unsetenv + - save_cache: <<: *save_cache - it: - <<: *defaults + itWithoutGdal: + working_directory: ~/repo + environment: + TERM: dumb + docker: + - image: circleci/openjdk:8-jdk resource_class: xlarge steps: - checkout @@ -110,6 +140,36 @@ jobs: - save_cache: <<: *save_cache + staticAnalysis: + <<: *defaults + + steps: + - checkout + - run: *setenv + - restore_cache: + <<: *restore_cache + + - run: cat /dev/null | sbt dependencyCheck + - run: cat /dev/null | sbt --debug dumpLicenseReport + + - run: *unsetenv + + - save_cache: + <<: *save_cache + - store_artifacts: + path: datasource/target/scala-2.11/dependency-check-report.html + destination: dependency-check-report-datasource.html + - store_artifacts: + path: experimental/target/scala-2.11/dependency-check-report.html + destination: dependency-check-report-experimental.html + - store_artifacts: + path: core/target/scala-2.11/dependency-check-report.html + destination: dependency-check-report-core.html + - store_artifacts: + path: pyrasterframes/target/scala-2.11/dependency-check-report.html + destination: dependency-check-report-pyrasterframes.html + + workflows: version: 2 all: @@ -119,20 +179,29 @@ workflows: filters: branches: only: - - /astraea\/feature\/.*-its/ - - publish: + - /feature\/.*-its/ + - itWithoutGdal: filters: branches: only: - - astraea/develop - nightlyReleaseAstraea: + - /feature\/.*-its/ + - docs: + filters: + branches: + only: + - /feature\/.*docs.*/ + - /docs\/.*/ + + nightly: triggers: - schedule: cron: "0 8 * * *" filters: branches: only: - - astraea/develop + - develop jobs: - it - - staticAnalysis + - itWithoutGdal + - docs +# - staticAnalysis diff --git a/.gitignore b/.gitignore index ca41e7212..ff43c9712 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,5 @@ metastore_db tour/jars tour/*.tiff scoverage-report* + +zz-* diff --git a/.scalafmt.conf b/.scalafmt.conf index 4d09e93c7..ca5e10394 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,10 +1,7 @@ maxColumn = 138 continuationIndent.defnSite = 2 -continuationIndent.callSite = 2 -continuationIndent.extendSite = 2 binPack.parentConstructors = true binPack.literalArgumentLists = false -binPack.unsafeCallSite = true newlines.penalizeSingleSelectMultiArgList = false newlines.sometimesBeforeColonInMethodReturnType = false align.openParenCallSite = false @@ -16,5 +13,4 @@ rewriteTokens { } optIn.selfAnnotationNewline = false optIn.breakChainOnFirstMethodDot = true -optIn.configStyleArguments = false -importSelectors = BinPack +importSelectors = BinPack \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index fbe2823fa..12cad75b7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,37 +1,38 @@ sudo: false +dist: xenial +language: python -language: scala +python: + - "3.7" cache: directories: - $HOME/.ivy2/cache - $HOME/.sbt/boot - $HOME/.rf_cache + - $HOME/.cache/coursier scala: - 2.11.11 -jdk: - - oraclejdk8 +env: + - COURSIER_VERBOSITY=-1 JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 addons: apt: packages: + - openjdk-8-jdk - pandoc - - python-pip install: - - pip install setuptools - -sbt_args: -no-colors + - pip install rasterio shapely pandas numpy pweave + - wget -O - https://piccolo.link/sbt-1.2.8.tgz | tar xzf - script: - - sbt test - - sbt it:test + - sbt/bin/sbt -java-home $JAVA_HOME -batch test + - sbt/bin/sbt -java-home $JAVA_HOME -batch it:test # - sbt -Dfile.encoding=UTF8 clean coverage test coverageReport # Tricks to avoid unnecessary cache updates - find $HOME/.sbt -name "*.lock" | xargs rm - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm -#after_success: -# - bash <(curl -s https://codecov.io/bash) \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 756d4cb8a..1be2dcdf7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,19 +1,10 @@ # Contributing to RasterFrames -Thanks for your interest in this project. +Community contributions are always welcome. To get started, please review this document, +the [code of conduct](https://github.com/locationtech/rasterframes/blob/develop/CODE_OF_CONDUCT.md), and reach out to +us on [gitter](https://gitter.im/locationtech/rasterframes) so the community can help you get started! -## Project Description - -LocationTech RasterFrames brings the power of Spark DataFrames to geospatial -raster data, empowered by the map algebra and tile layer operations of -GeoTrellis. The underlying purpose of RasterFrames is to allow data scientists -and software developers to process and analyze geospatial-temporal raster data -with the same flexibility and ease as any other Spark Catalyst data type. At its -core is a user-defined type (UDT) called TileUDT, which encodes a GeoTrellis -Tile in a form the Spark Catalyst engine can process. Furthermore, we extend the -definition of a DataFrame to encompass some additional invariants, allowing for -geospatial operations within and between RasterFrames to occur, while still -maintaining necessary geo-referencing constructs. +The official home of RasterFrames under the Eclipse Foundation may be found here: * https://projects.eclipse.org/projects/locationtech.rasterframes @@ -58,19 +49,10 @@ commands are as follows: * Build documentation: `sbt makeSite` * Spark shell with RasterFrames initialized: `sbt console` - -## Contribution Process - -RasterFrames uses GitHub pull requests (PRs) for accepting contributions. -Please fork the repository, create a branch, and submit a PR based off the `master` branch. -During the PR review process comments may be attached. Please look out for comments -and respond as necessary. - - ## Contact Help, questions and community dialog are supported via Gitter: - * https://gitter.im/s22s/raster-frames + * https://gitter.im/locationtech/rasterframes Commercial support is available by writing to info@astraea.earth diff --git a/LICENSE b/LICENSE index 152d8c948..53d10c005 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ This software is licensed under the Apache 2 license, quoted below. -Copyright 2017-2018 Astraea, Inc. +Copyright 2017-2019 Astraea, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of diff --git a/README.md b/README.md index dddeb94ae..2b3bcb43f 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,67 @@ - +® - [![Join the chat at https://gitter.im/s22s/raster-frames](https://badges.gitter.im/s22s/raster-frames.svg)](https://gitter.im/s22s/raster-frames?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) + [![Join the chat at https://gitter.im/locationtech/rasterframes](https://badges.gitter.im/locationtech/rasterframes.svg)](https://gitter.im/locationtech/rasterframes?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -_RasterFrames™_ brings the power of Spark DataFrames to geospatial raster data, empowered by the map algebra and tile layer operations of [GeoTrellis](https://geotrellis.io/). +RasterFrames® brings together Earth-observation (EO) data access, cloud computing, and DataFrame-based data science. The recent explosion of EO data from public and private satellite operators presents both a huge opportunity as well as a challenge to the data analysis community. It is _Big Data_ in the truest sense, and its footprint is rapidly getting bigger. - +RasterFrames provides a DataFrame-centric view over arbitrary raster data, enabling spatiotemporal queries, map algebra raster operations, and compatibility with the ecosystem of Spark ML algorithms. By using DataFrames as the core cognitive and compute data model, it is able to deliver these features in a form that is both accessible to general analysts and scalable along with the rapidly growing data footprint. + + Please see the [Getting Started](http://rasterframes.io/getting-started.html) section of the Users' Manual to start using RasterFrames. -## Documentation +## User Resources + +* [RasterFrames Users' Manual](http://rasterframes.io/) +* [RasterFrames Jupyter Notebook Docker Image](https://hub.docker.com/r/s22s/rasterframes-notebook/) +* [Gitter Channel](https://gitter.im/locationtech/rasterframes) +* [Submit an Issue](https://github.com/locationtech/rasterframes/issues) + + +## Contributing + +Community contributions are always welcome. To get started, please review our [contribution guidelines](https://github.com/locationtech/rasterframes/blob/develop/CONTRIBUTING.md), [code of conduct](https://github.com/locationtech/rasterframes/blob/develop/CODE_OF_CONDUCT.md), and reach out to us on [gitter](https://gitter.im/locationtech/rasterframes) so the community can help you get started! + +RasterFrames is part of the LocationTech Stack. + + + +It is written in Scala, but with Python bindings. If you wish to contribute to the development of RasterFrames, or you +wish to build it from scratch, you will need [sbt](https://www.scala-sbt.org/). Then clone the repository from GitHub. + +```bash +git clone https://github.com/locationtech/rasterframes.git +cd rasterframes +``` + +To publish to your local repository: + +```bash +sbt publishLocal +``` + +You can run tests with + +```bash +sbt test +``` + +and integration tests + +```bash +sbt it:test +``` + +The documentation may be built with + +```bash +sbt makeSite +``` -* [Users' Manual](http://rasterframes.io/) -* [API Documentation](http://rasterframes.io/latest/api/index.html) -* [List of available UDFs](http://rasterframes.io/latest/api/index.html#astraea.spark.rasterframes.RasterFunctions) -* [RasterFrames Jupyter Notebook Docker Image](https://hub.docker.com/r/s22s/rasterframes-notebooks/) +Additional, Python sepcific build instruction may be found at [pyrasterframes/src/main/python/README.md](pyrasterframes/src/main/python/README.md) ## Copyright and License -RasterFrames is released under the Apache 2.0 License, copyright Astraea, Inc. 2017-2018. +RasterFrames is released under the Apache 2.0 License, copyright Astraea, Inc. 2017-2019. diff --git a/bench/archive/jmh-results-20190528095237.json b/bench/archive/jmh-results-20190528095237.json new file mode 100644 index 000000000..11d8f15f0 --- /dev/null +++ b/bench/archive/jmh-results-20190528095237.json @@ -0,0 +1,163 @@ +[ + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEquals", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 421.72586734818816, + "scoreError" : 21.639640737136855, + "scoreConfidence" : [ + 400.0862266110513, + 443.365508085325 + ], + "scorePercentiles" : { + "0.0" : 416.0271304058273, + "50.0" : 420.27062086802925, + "90.0" : 430.09766576285773, + "95.0" : 430.09766576285773, + "99.0" : 430.09766576285773, + "99.9" : 430.09766576285773, + "99.99" : 430.09766576285773, + "99.999" : 430.09766576285773, + "99.9999" : 430.09766576285773, + "100.0" : 430.09766576285773 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 417.8839821936131, + 416.0271304058273, + 430.09766576285773, + 424.349937510613, + 420.27062086802925 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.resolveCRS", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 358.41316103726996, + "scoreError" : 21.21533668234991, + "scoreConfidence" : [ + 337.19782435492004, + 379.6284977196199 + ], + "scorePercentiles" : { + "0.0" : 351.9024799408263, + "50.0" : 356.47836323413975, + "90.0" : 364.61463628732025, + "95.0" : 364.61463628732025, + "99.0" : 364.61463628732025, + "99.9" : 364.61463628732025, + "99.99" : 364.61463628732025, + "99.999" : 364.61463628732025, + "99.9999" : 364.61463628732025, + "100.0" : 364.61463628732025 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 356.47836323413975, + 364.61463628732025, + 355.393162879192, + 363.67716284487153, + 351.9024799408263 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.selfEquals", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 431.7566794354569, + "scoreError" : 27.353383248644004, + "scoreConfidence" : [ + 404.4032961868129, + 459.11006268410085 + ], + "scorePercentiles" : { + "0.0" : 426.33479529843424, + "50.0" : 428.0558107251467, + "90.0" : 443.6223317651236, + "95.0" : 443.6223317651236, + "99.0" : 443.6223317651236, + "99.9" : 443.6223317651236, + "99.99" : 443.6223317651236, + "99.999" : 443.6223317651236, + "99.9999" : 443.6223317651236, + "100.0" : 443.6223317651236 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 443.6223317651236, + 433.04934952339687, + 426.33479529843424, + 427.72110986518294, + 428.0558107251467 + ] + ] + }, + "secondaryMetrics" : { + } + } +] + + diff --git a/bench/archive/jmh-results-20190528102317.json b/bench/archive/jmh-results-20190528102317.json new file mode 100644 index 000000000..20e5712a1 --- /dev/null +++ b/bench/archive/jmh-results-20190528102317.json @@ -0,0 +1,163 @@ +[ + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEquals", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 7.084784138200969, + "scoreError" : 0.17086123184222066, + "scoreConfidence" : [ + 6.913922906358748, + 7.255645370043189 + ], + "scorePercentiles" : { + "0.0" : 7.044718220088579, + "50.0" : 7.07291741895685, + "90.0" : 7.153508778664247, + "95.0" : 7.153508778664247, + "99.0" : 7.153508778664247, + "99.9" : 7.153508778664247, + "99.99" : 7.153508778664247, + "99.999" : 7.153508778664247, + "99.9999" : 7.153508778664247, + "100.0" : 7.153508778664247 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 7.07291741895685, + 7.044718220088579, + 7.101541478172196, + 7.153508778664247, + 7.051234795122972 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.resolveCRS", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 0.07204597602344914, + "scoreError" : 0.014595696186190624, + "scoreConfidence" : [ + 0.05745027983725852, + 0.08664167220963977 + ], + "scorePercentiles" : { + "0.0" : 0.067949003349235, + "50.0" : 0.07168162461398803, + "90.0" : 0.07812749538776566, + "95.0" : 0.07812749538776566, + "99.0" : 0.07812749538776566, + "99.9" : 0.07812749538776566, + "99.99" : 0.07812749538776566, + "99.999" : 0.07812749538776566, + "99.9999" : 0.07812749538776566, + "100.0" : 0.07812749538776566 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 0.0701740653091496, + 0.07812749538776566, + 0.07229769145710743, + 0.07168162461398803, + 0.067949003349235 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.selfEquals", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 0.04160068241214439, + "scoreError" : 8.000367242705733E-4, + "scoreConfidence" : [ + 0.04080064568787382, + 0.04240071913641496 + ], + "scorePercentiles" : { + "0.0" : 0.0413462799613575, + "50.0" : 0.04153631854728124, + "90.0" : 0.04183927734451199, + "95.0" : 0.04183927734451199, + "99.0" : 0.04183927734451199, + "99.9" : 0.04183927734451199, + "99.99" : 0.04183927734451199, + "99.999" : 0.04183927734451199, + "99.9999" : 0.04183927734451199, + "100.0" : 0.04183927734451199 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 0.0413462799613575, + 0.04183927734451199, + 0.04153631854728124, + 0.0417885245223439, + 0.0414930116852273 + ] + ] + }, + "secondaryMetrics" : { + } + } +] + + diff --git a/bench/archive/jmh-results-20190606082738.json b/bench/archive/jmh-results-20190606082738.json new file mode 100644 index 000000000..b8e7467b1 --- /dev/null +++ b/bench/archive/jmh-results-20190606082738.json @@ -0,0 +1,269 @@ +[ + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEqualsFalse", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 13.115296907959536, + "scoreError" : 2.5400077963191556, + "scoreConfidence" : [ + 10.57528911164038, + 15.655304704278691 + ], + "scorePercentiles" : { + "0.0" : 12.590614558661818, + "50.0" : 12.853830352008682, + "90.0" : 14.25982363939229, + "95.0" : 14.25982363939229, + "99.0" : 14.25982363939229, + "99.9" : 14.25982363939229, + "99.99" : 14.25982363939229, + "99.999" : 14.25982363939229, + "99.9999" : 14.25982363939229, + "100.0" : 14.25982363939229 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 12.853830352008682, + 12.590614558661818, + 12.829707376038487, + 14.25982363939229, + 13.042508613696407 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEqualsTrue", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 6.5973550106567345, + "scoreError" : 0.1946737881542353, + "scoreConfidence" : [ + 6.402681222502499, + 6.7920287988109695 + ], + "scorePercentiles" : { + "0.0" : 6.523477357639692, + "50.0" : 6.6063669572343695, + "90.0" : 6.648688182671118, + "95.0" : 6.648688182671118, + "99.0" : 6.648688182671118, + "99.9" : 6.648688182671118, + "99.99" : 6.648688182671118, + "99.999" : 6.648688182671118, + "99.9999" : 6.648688182671118, + "100.0" : 6.648688182671118 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 6.635409622463296, + 6.523477357639692, + 6.648688182671118, + 6.572832933275196, + 6.6063669572343695 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalLazyEqualsFalse", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 13.265730662256157, + "scoreError" : 1.915211216125259, + "scoreConfidence" : [ + 11.350519446130898, + 15.180941878381416 + ], + "scorePercentiles" : { + "0.0" : 12.850610015459289, + "50.0" : 13.13459015560355, + "90.0" : 14.120588306765669, + "95.0" : 14.120588306765669, + "99.0" : 14.120588306765669, + "99.9" : 14.120588306765669, + "99.99" : 14.120588306765669, + "99.999" : 14.120588306765669, + "99.9999" : 14.120588306765669, + "100.0" : 14.120588306765669 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 13.215365538647765, + 13.13459015560355, + 13.007499294804513, + 12.850610015459289, + 14.120588306765669 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalLazyEqualsTrue", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 0.040409137130485946, + "scoreError" : 0.007586963982726796, + "scoreConfidence" : [ + 0.03282217314775915, + 0.047996101113212744 + ], + "scorePercentiles" : { + "0.0" : 0.03867633630965359, + "50.0" : 0.04003336568127626, + "90.0" : 0.04377238392008154, + "95.0" : 0.04377238392008154, + "99.0" : 0.04377238392008154, + "99.9" : 0.04377238392008154, + "99.99" : 0.04377238392008154, + "99.999" : 0.04377238392008154, + "99.9999" : 0.04377238392008154, + "100.0" : 0.04377238392008154 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 0.03867633630965359, + 0.04003336568127626, + 0.04016005719940341, + 0.03940354254201491, + 0.04377238392008154 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.resolveCRS", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 0.06507083680791029, + "scoreError" : 0.003209489715829842, + "scoreConfidence" : [ + 0.061861347092080445, + 0.06828032652374012 + ], + "scorePercentiles" : { + "0.0" : 0.06425209212071442, + "50.0" : 0.06461825090771647, + "90.0" : 0.06612649264562556, + "95.0" : 0.06612649264562556, + "99.0" : 0.06612649264562556, + "99.9" : 0.06612649264562556, + "99.99" : 0.06612649264562556, + "99.999" : 0.06612649264562556, + "99.9999" : 0.06612649264562556, + "100.0" : 0.06612649264562556 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 0.06612649264562556, + 0.06579754694820603, + 0.06461825090771647, + 0.06425209212071442, + 0.06455980141728893 + ] + ] + }, + "secondaryMetrics" : { + } + } +] + + diff --git a/bench/archive/jmh-results-20190606094001.json b/bench/archive/jmh-results-20190606094001.json new file mode 100644 index 000000000..02cdd3194 --- /dev/null +++ b/bench/archive/jmh-results-20190606094001.json @@ -0,0 +1,269 @@ +[ + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEqualsFalse", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 13.076857373851485, + "scoreError" : 1.6593497203225103, + "scoreConfidence" : [ + 11.417507653528975, + 14.736207094173995 + ], + "scorePercentiles" : { + "0.0" : 12.633328785860648, + "50.0" : 13.200439575276704, + "90.0" : 13.659196200240215, + "95.0" : 13.659196200240215, + "99.0" : 13.659196200240215, + "99.9" : 13.659196200240215, + "99.99" : 13.659196200240215, + "99.999" : 13.659196200240215, + "99.9999" : 13.659196200240215, + "100.0" : 13.659196200240215 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 13.659196200240215, + 12.665249239331997, + 13.200439575276704, + 13.226073068547855, + 12.633328785860648 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalEqualsTrue", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 0.2775587837304895, + "scoreError" : 0.015896893581796353, + "scoreConfidence" : [ + 0.2616618901486931, + 0.29345567731228583 + ], + "scorePercentiles" : { + "0.0" : 0.2724269842972383, + "50.0" : 0.2775487008943729, + "90.0" : 0.2816631615036355, + "95.0" : 0.2816631615036355, + "99.0" : 0.2816631615036355, + "99.9" : 0.2816631615036355, + "99.99" : 0.2816631615036355, + "99.999" : 0.2816631615036355, + "99.9999" : 0.2816631615036355, + "100.0" : 0.2816631615036355 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 0.28157426698598376, + 0.2816631615036355, + 0.27458080497121706, + 0.2775487008943729, + 0.2724269842972383 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalLazyEqualsFalse", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 13.143184640034391, + "scoreError" : 0.6865674500293741, + "scoreConfidence" : [ + 12.456617190005018, + 13.829752090063765 + ], + "scorePercentiles" : { + "0.0" : 12.925994808467195, + "50.0" : 13.17768387931118, + "90.0" : 13.341295384511856, + "95.0" : 13.341295384511856, + "99.0" : 13.341295384511856, + "99.9" : 13.341295384511856, + "99.99" : 13.341295384511856, + "99.999" : 13.341295384511856, + "99.9999" : 13.341295384511856, + "100.0" : 13.341295384511856 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 13.341295384511856, + 13.17768387931118, + 12.925994808467195, + 12.995056782282637, + 13.27589234559909 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.logicalLazyEqualsTrue", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 0.03659740987415034, + "scoreError" : 0.0011385555881718446, + "scoreConfidence" : [ + 0.035458854285978496, + 0.037735965462322184 + ], + "scorePercentiles" : { + "0.0" : 0.036314017083098636, + "50.0" : 0.0364980924818408, + "90.0" : 0.03691511396572689, + "95.0" : 0.03691511396572689, + "99.0" : 0.03691511396572689, + "99.9" : 0.03691511396572689, + "99.99" : 0.03691511396572689, + "99.999" : 0.03691511396572689, + "99.9999" : 0.03691511396572689, + "100.0" : 0.03691511396572689 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 0.036314017083098636, + 0.03635019813669222, + 0.0364980924818408, + 0.03691511396572689, + 0.03690962770339316 + ] + ] + }, + "secondaryMetrics" : { + } + }, + { + "jmhVersion" : "1.21", + "benchmark" : "org.locationtech.rasterframes.bench.CRSBench.resolveCRS", + "mode" : "avgt", + "threads" : 1, + "forks" : 1, + "jvm" : "/Library/Java/JavaVirtualMachines/jdk1.8.0_171.jdk/Contents/Home/jre/bin/java", + "jvmArgs" : [ + "-Xmx2048M", + "-Xmx4g" + ], + "jdkVersion" : "1.8.0_171", + "vmName" : "Java HotSpot(TM) 64-Bit Server VM", + "vmVersion" : "25.171-b11", + "warmupIterations" : 8, + "warmupTime" : "10 s", + "warmupBatchSize" : 1, + "measurementIterations" : 5, + "measurementTime" : "10 s", + "measurementBatchSize" : 1, + "primaryMetric" : { + "score" : 0.06517305542168148, + "scoreError" : 0.005310236878903678, + "scoreConfidence" : [ + 0.05986281854277779, + 0.07048329230058516 + ], + "scorePercentiles" : { + "0.0" : 0.06355938701044708, + "50.0" : 0.06512777854120488, + "90.0" : 0.06700762802360496, + "95.0" : 0.06700762802360496, + "99.0" : 0.06700762802360496, + "99.9" : 0.06700762802360496, + "99.99" : 0.06700762802360496, + "99.999" : 0.06700762802360496, + "99.9999" : 0.06700762802360496, + "100.0" : 0.06700762802360496 + }, + "scoreUnit" : "us/op", + "rawData" : [ + [ + 0.06700762802360496, + 0.06512777854120488, + 0.06418521981870606, + 0.06598526371444442, + 0.06355938701044708 + ] + ] + }, + "secondaryMetrics" : { + } + } +] + + diff --git a/bench/src/main/resources/log4j.properties b/bench/src/main/resources/log4j.properties index f8cb2c5b9..9ed1d66ca 100644 --- a/bench/src/main/resources/log4j.properties +++ b/bench/src/main/resources/log4j.properties @@ -18,7 +18,7 @@ # # Set everything to be logged to the console -log4j.rootCategory=TRACE, FILE +log4j.rootCategory=INFO, FILE log4j.appender.FILE=org.apache.log4j.FileAppender log4j.appender.FILE.File=target/jmh-log.out @@ -36,7 +36,7 @@ log4j.logger.org.spark_project.jetty=WARN log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO -log4j.logger.astraea.spark=DEBUG +log4j.logger.org.locationtech=DEBUG # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala b/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala deleted file mode 100644 index c68c826e8..000000000 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/RasterRefBench.scala +++ /dev/null @@ -1,129 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes.bench - - -import java.util.concurrent.TimeUnit - -import astraea.spark.rasterframes -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.transformers.RasterSourceToTiles -import astraea.spark.rasterframes.ref.RasterSource -import astraea.spark.rasterframes.ref.RasterSource.ReadCallback -import com.typesafe.scalalogging.LazyLogging -import org.apache.spark.sql._ -import org.openjdk.jmh.annotations._ -/** - * - * - * @since 11/1/18 - */ -@BenchmarkMode(Array(Mode.AverageTime)) -@State(Scope.Benchmark) -@OutputTimeUnit(TimeUnit.MILLISECONDS) -class RasterRefBench extends SparkEnv with LazyLogging { - import spark.implicits._ - - var expandedDF: DataFrame = _ - var singleDF: DataFrame = _ - - @Setup(Level.Trial) - def setupData(): Unit = { - val watcher = new ReadCallback { - var count: Long = 0 - var calls: Int = 0 - override def readRange(source: RasterSource, start: Long, length: Int): Unit = { - calls += 1 - count += length - logger.debug("%4d -- %,d bytes".format(calls, count)) - } - } - - val r1 = RasterSource(remoteCOGSingleband1, Some(watcher)) - val r2 = RasterSource(remoteCOGSingleband2, Some(watcher)) - singleDF = Seq((r1, r2)).toDF("B1", "B2") - .select(RasterSourceToTiles(false, $"B1", $"B2")) - - expandedDF = Seq((r1, r2)).toDF("B1", "B2") - .select(RasterSourceToTiles(true, $"B1", $"B2")) - } - - @Benchmark - def computeDifferenceExpanded() = { - expandedDF - .select(normalized_difference($"B1", $"B2")) - .cache() - .count() - } - - @Benchmark - def computeDifferenceSingle() = { - singleDF - .select(normalized_difference($"B1", $"B2")) - .cache() - .count() - } - - @Benchmark - def computeStatsSingle() = { - singleDF.select(agg_stats($"B1")).collect() - } - - @Benchmark - def computeStatsExpanded() = { - expandedDF.select(agg_stats($"B1")).collect() - } - - @Benchmark - def computeDifferenceStats() = { - singleDF.select(agg_stats(normalized_difference($"B1", $"B2"))).collect() - } - -} - -object RasterRefBench { - -// import org.openjdk.jmh.runner.RunnerException -// import org.openjdk.jmh.runner.options.OptionsBuilder -// -// @throws[RunnerException] - def main(args: Array[String]): Unit = { - - val thing = new RasterRefBench() - thing.setupData() - rasterframes.util.time("compute stats expanded") { - thing.computeStatsSingle() - } - - rasterframes.util.time("compute stats single") { - thing.computeStatsExpanded() - } - - // val opt = new OptionsBuilder() -// .include(classOf[RasterRefBench].getSimpleName) -// .threads(4) -// .forks(5) -// .build() -// -// new Runner(opt).run() - } -} diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/BinaryTileOpBench.scala similarity index 92% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/BinaryTileOpBench.scala index 133d93356..dce7a7715 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/BinaryTileOpBench.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/BinaryTileOpBench.scala @@ -19,11 +19,12 @@ * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench + import java.util.concurrent.TimeUnit -import astraea.spark.rasterframes.expressions.localops._ -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes.expressions.localops._ +import org.locationtech.rasterframes._ import geotrellis.raster.Tile import geotrellis.raster.mapalgebra.{local => gt} import org.apache.spark.sql._ diff --git a/bench/src/main/scala/org/locationtech/rasterframes/bench/CRSBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/CRSBench.scala new file mode 100644 index 000000000..424533b58 --- /dev/null +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/CRSBench.scala @@ -0,0 +1,69 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.bench + +import java.util.concurrent.TimeUnit + +import geotrellis.proj4.{CRS, LatLng, WebMercator} +import org.locationtech.proj4j.CoordinateReferenceSystem +import org.locationtech.rasterframes.model.LazyCRS +import org.openjdk.jmh.annotations._ + +@BenchmarkMode(Array(Mode.AverageTime)) +@State(Scope.Benchmark) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +class CRSBench extends SparkEnv { + + var crs1: CRS = _ + var crs2: CRS = _ + + @Setup(Level.Invocation) + def setupData(): Unit = { + crs1 = LazyCRS("epsg:4326") + crs2 = LazyCRS(WebMercator.toProj4String) + } + + @Benchmark + def resolveCRS(): CoordinateReferenceSystem = { + crs1.proj4jCrs + } + + @Benchmark + def logicalEqualsTrue(): Boolean = { + crs1 == LatLng + } + + @Benchmark + def logicalEqualsFalse(): Boolean = { + crs1 == WebMercator + } + + @Benchmark + def logicalLazyEqualsTrue(): Boolean = { + crs1 == crs1 + } + + @Benchmark + def logicalLazyEqualsFalse(): Boolean = { + crs1 == crs2 + } +} diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/CatalystSerializerBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/CatalystSerializerBench.scala similarity index 94% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/CatalystSerializerBench.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/CatalystSerializerBench.scala index b4abad9bf..12a6b0486 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/CatalystSerializerBench.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/CatalystSerializerBench.scala @@ -19,15 +19,15 @@ * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench import java.util.concurrent.TimeUnit -import astraea.spark.rasterframes.encoders.{CatalystSerializer, StandardEncoders} import geotrellis.proj4.{CRS, LatLng, Sinusoidal} import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.locationtech.rasterframes.encoders.{CatalystSerializer, StandardEncoders} import org.openjdk.jmh.annotations._ @BenchmarkMode(Array(Mode.AverageTime)) diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/MultibandRenderBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/MultibandRenderBench.scala similarity index 87% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/MultibandRenderBench.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/MultibandRenderBench.scala index 383710205..8636a6a80 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/MultibandRenderBench.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/MultibandRenderBench.scala @@ -19,12 +19,13 @@ * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench + import java.util.concurrent.TimeUnit -import astraea.spark.rasterframes.util.MultibandRender.Landsat8NaturalColor +import org.locationtech.rasterframes.util.MultibandRender.Landsat8NaturalColor import geotrellis.raster._ -import geotrellis.raster.io.geotiff.{GeoTiff, MultibandGeoTiff} +import geotrellis.raster.io.geotiff.MultibandGeoTiff import org.apache.commons.io.IOUtils import org.openjdk.jmh.annotations._ diff --git a/bench/src/main/scala/org/locationtech/rasterframes/bench/RasterRefBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/RasterRefBench.scala new file mode 100644 index 000000000..448fab9c3 --- /dev/null +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/RasterRefBench.scala @@ -0,0 +1,87 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.bench + +import java.util.concurrent.TimeUnit + +import com.typesafe.scalalogging.LazyLogging +import org.apache.spark.sql._ +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs +import org.locationtech.rasterframes.expressions.transformers.RasterRefToTile +import org.locationtech.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.ref.RasterSource +import org.openjdk.jmh.annotations._ + +@BenchmarkMode(Array(Mode.AverageTime)) +@State(Scope.Benchmark) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +class RasterRefBench extends SparkEnv with LazyLogging { + import spark.implicits._ + + var expandedDF: DataFrame = _ + var singleDF: DataFrame = _ + + @Setup(Level.Trial) + def setupData(): Unit = { + val r1 = RasterSource(remoteCOGSingleband1) + val r2 = RasterSource(remoteCOGSingleband2) + + singleDF = Seq((r1, r2)).toDF("B1", "B2") + .select(RasterRefToTile(RasterSourceToRasterRefs(Some(TileDimensions(r1.dimensions)), Seq(0), $"B1", $"B2"))) + + expandedDF = Seq((r1, r2)).toDF("B1", "B2") + .select(RasterRefToTile(RasterSourceToRasterRefs($"B1", $"B2"))) + } + + @Benchmark + def computeDifferenceExpanded() = { + expandedDF + .select(rf_normalized_difference($"B1", $"B2")) + .cache() + .count() + } + + @Benchmark + def computeDifferenceSingle() = { + singleDF + .select(rf_normalized_difference($"B1", $"B2")) + .cache() + .count() + } + + @Benchmark + def computeStatsSingle() = { + singleDF.select(rf_agg_stats($"B1")).collect() + } + + @Benchmark + def computeStatsExpanded() = { + expandedDF.select(rf_agg_stats($"B1")).collect() + } + + @Benchmark + def computeDifferenceStats() = { + singleDF.select(rf_agg_stats(rf_normalized_difference($"B1", $"B2"))).collect() + } + +} \ No newline at end of file diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/SparkEnv.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/SparkEnv.scala similarity index 92% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/SparkEnv.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/SparkEnv.scala index 8b718479a..d3691f800 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/SparkEnv.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/SparkEnv.scala @@ -15,11 +15,13 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import org.apache.spark.sql.SparkSession import org.openjdk.jmh.annotations.{Level, TearDown} diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/StatsComputeBench.scala similarity index 75% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/StatsComputeBench.scala index c9aa7eef4..2ebc3efc0 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/StatsComputeBench.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/StatsComputeBench.scala @@ -15,14 +15,16 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench import java.util.concurrent.TimeUnit -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.stats.CellHistogram +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.stats.CellHistogram import org.apache.spark.sql._ import org.openjdk.jmh.annotations._ @@ -57,26 +59,26 @@ class StatsComputeBench extends SparkEnv { // @Benchmark // def computeStats(): Array[CellStatistics] = { -// tiles.select(agg_stats($"tile")).collect() +// tiles.select(rf_agg_stats($"tile")).collect() // } @Benchmark def computeHistogram(): Array[CellHistogram] = { - tiles.select(agg_approx_histogram($"tile")).collect() + tiles.select(rf_agg_approx_histogram($"tile")).collect() } // @Benchmark // def extractMean(): Array[Double] = { -// tiles.select(agg_stats($"tile").getField("mean")).map(_.getDouble(0)).collect() +// tiles.select(rf_agg_stats($"tile").getField("mean")).map(_.getDouble(0)).collect() // } // // @Benchmark // def directMean(): Array[Double] = { -// tiles.repartition(10).select(agg_mean($"tile")).collect() +// tiles.repartition(10).select(rf_agg_mean($"tile")).collect() // } // @Benchmark // def computeCounts() = { -// tiles.toDF("tile").select(data_cells($"tile") as "counts").agg(sum($"counts")).collect() +// tiles.toDF("tile").select(rf_data_cells($"tile") as "counts").agg(sum($"counts")).collect() // } } diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileAssembleBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileAssembleBench.scala similarity index 90% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/TileAssembleBench.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/TileAssembleBench.scala index 8fe31ef0d..8b33af369 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileAssembleBench.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileAssembleBench.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea, Inc. + * Copyright 2017 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -19,10 +19,11 @@ * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench + import java.util.concurrent.TimeUnit -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.raster.ByteConstantNoDataCellType import org.apache.spark.sql._ import org.openjdk.jmh.annotations._ @@ -44,7 +45,7 @@ class TileAssembleBench extends SparkEnv { var cells1: DataFrame = _ var cells2: DataFrame = _ - val assembler = assemble_tile( + val assembler = rf_assemble_tile( $"column_index", $"row_index", $"tile", tileSize, tileSize, cellType ) @@ -53,7 +54,7 @@ class TileAssembleBench extends SparkEnv { def setupData(): Unit = { cells1 = Seq.fill(numTiles)(randomTile(tileSize, tileSize, cellType.name)).zipWithIndex .toDF("tile", "id") - .select($"id", explode_tiles($"tile")) + .select($"id", rf_explode_tiles($"tile")) .repartition(4, $"id") .cache() diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileCellScanBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileCellScanBench.scala similarity index 92% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/TileCellScanBench.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/TileCellScanBench.scala index 64ee8716e..350ac811a 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileCellScanBench.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileCellScanBench.scala @@ -15,15 +15,17 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench import java.util.concurrent.TimeUnit -import astraea.spark.rasterframes.tiles.InternalRowTile import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.rf.TileUDT +import org.locationtech.rasterframes.tiles.InternalRowTile import org.openjdk.jmh.annotations._ @BenchmarkMode(Array(Mode.AverageTime)) diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileEncodeBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileEncodeBench.scala similarity index 85% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/TileEncodeBench.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/TileEncodeBench.scala index 7f25235ae..a4b0a2595 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileEncodeBench.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileEncodeBench.scala @@ -15,19 +15,22 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench import java.net.URI import java.util.concurrent.TimeUnit -import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile -import astraea.spark.rasterframes.ref.{RasterRef, RasterSource} +import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile +import org.locationtech.rasterframes.ref.RasterRef import geotrellis.raster.Tile import geotrellis.vector.Extent import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.locationtech.rasterframes.ref.{RasterRef, RasterSource} import org.openjdk.jmh.annotations._ @BenchmarkMode(Array(Mode.AverageTime)) @@ -52,7 +55,7 @@ class TileEncodeBench extends SparkEnv { cellTypeName match { case "rasterRef" ⇒ val baseCOG = "https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/149/039/LC08_L1TP_149039_20170411_20170415_01_T1/LC08_L1TP_149039_20170411_20170415_01_T1_B1.TIF" - tile = RasterRefTile(RasterRef(RasterSource(URI.create(baseCOG)), Some(Extent(253785.0, 3235185.0, 485115.0, 3471015.0)))) + tile = RasterRefTile(RasterRef(RasterSource(URI.create(baseCOG)), 0, Some(Extent(253785.0, 3235185.0, 485115.0, 3471015.0)))) case _ ⇒ tile = randomTile(tileSize, tileSize, cellTypeName) } diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileExplodeBench.scala similarity index 86% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/TileExplodeBench.scala index ebd4f169c..7f3352f69 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/TileExplodeBench.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/TileExplodeBench.scala @@ -15,12 +15,14 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.bench +package org.locationtech.rasterframes.bench import java.util.concurrent.TimeUnit -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.openjdk.jmh.annotations._ @@ -56,11 +58,11 @@ class TileExplodeBench extends SparkEnv { @Benchmark def arrayExplode() = { - tiles.select(posexplode(tile_to_array_double($"tile"))).count() + tiles.select(posexplode(rf_tile_to_array_double($"tile"))).count() } @Benchmark def tileExplode() = { - tiles.select(explode_tiles($"tile")).count() + tiles.select(rf_explode_tiles($"tile")).count() } } diff --git a/bench/src/main/scala/astraea/spark/rasterframes/bench/package.scala b/bench/src/main/scala/org/locationtech/rasterframes/bench/package.scala similarity index 97% rename from bench/src/main/scala/astraea/spark/rasterframes/bench/package.scala rename to bench/src/main/scala/org/locationtech/rasterframes/bench/package.scala index 525c86734..65d8ab88f 100644 --- a/bench/src/main/scala/astraea/spark/rasterframes/bench/package.scala +++ b/bench/src/main/scala/org/locationtech/rasterframes/bench/package.scala @@ -17,7 +17,7 @@ * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes import java.net.URI diff --git a/build.sbt b/build.sbt index 05aed4e8d..5aa2e6009 100644 --- a/build.sbt +++ b/build.sbt @@ -1,46 +1,163 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2017-2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + addCommandAlias("makeSite", "docs/makeSite") +addCommandAlias("previewSite", "docs/previewSite") +addCommandAlias("ghpagesPushSite", "docs/ghpagesPushSite") addCommandAlias("console", "datasource/console") +// Prefer our own IntegrationTest config definition, which inherits from Test. +lazy val IntegrationTest = config("it") extend Test + lazy val root = project .in(file(".")) .withId("RasterFrames") .aggregate(core, datasource, pyrasterframes, experimental) + .enablePlugins(RFReleasePlugin) .settings(publish / skip := true) - .settings(releaseSettings) - -lazy val deployment = project - .dependsOn(root) - .disablePlugins(SparkPackagePlugin) -lazy val IntegrationTest = config("it") extend Test +lazy val `rf-notebook` = project + .dependsOn(pyrasterframes) + .enablePlugins(RFAssemblyPlugin, DockerPlugin) + .settings(publish / skip := true) lazy val core = project + .enablePlugins(BuildInfoPlugin) .configs(IntegrationTest) .settings(inConfig(IntegrationTest)(Defaults.testSettings)) .settings(Defaults.itSettings) - .disablePlugins(SparkPackagePlugin) + .settings( + moduleName := "rasterframes", + libraryDependencies ++= Seq( + shapeless, + `jts-core`, + geomesa("z3").value, + geomesa("spark-jts").value, + `geotrellis-contrib-vlm`, + `geotrellis-contrib-gdal`, + spark("core").value % Provided, + spark("mllib").value % Provided, + spark("sql").value % Provided, + geotrellis("spark").value, + geotrellis("raster").value, + geotrellis("s3").value, + geotrellis("spark-testkit").value % Test excludeAll ( + ExclusionRule(organization = "org.scalastic"), + ExclusionRule(organization = "org.scalatest") + ), + scaffeine, + scalatest + ), + buildInfoKeys ++= Seq[BuildInfoKey]( + moduleName, version, scalaVersion, sbtVersion, rfGeoTrellisVersion, rfGeoMesaVersion, rfSparkVersion + ), + buildInfoPackage := "org.locationtech.rasterframes", + buildInfoObject := "RFBuildInfo", + buildInfoOptions := Seq( + BuildInfoOption.ToMap, + BuildInfoOption.BuildTime, + BuildInfoOption.ToJson + ) + ) lazy val pyrasterframes = project .dependsOn(core, datasource, experimental) - .settings(assemblySettings) + .enablePlugins(RFAssemblyPlugin, PythonBuildPlugin) + .settings( + libraryDependencies ++= Seq( + geotrellis("s3").value, + spark("core").value % Provided, + spark("mllib").value % Provided, + spark("sql").value % Provided + ) + ) lazy val datasource = project + .configs(IntegrationTest) + .settings(Defaults.itSettings) .dependsOn(core % "test->test;compile->compile") - .disablePlugins(SparkPackagePlugin) + .settings( + moduleName := "rasterframes-datasource", + libraryDependencies ++= Seq( + geotrellis("s3").value, + spark("core").value % Provided, + spark("mllib").value % Provided, + spark("sql").value % Provided + ), + initialCommands in console := (initialCommands in console).value + + """ + |import org.locationtech.rasterframes.datasource.geotrellis._ + |import org.locationtech.rasterframes.datasource.geotiff._ + |""".stripMargin + ) lazy val experimental = project .configs(IntegrationTest) .settings(Defaults.itSettings) .dependsOn(core % "test->test;it->test;compile->compile") .dependsOn(datasource % "test->test;it->test;compile->compile") - .disablePlugins(SparkPackagePlugin) + .settings( + moduleName := "rasterframes-experimental", + libraryDependencies ++= Seq( + geotrellis("s3").value, + spark("core").value % Provided, + spark("mllib").value % Provided, + spark("sql").value % Provided + ), + fork in IntegrationTest := true, + javaOptions in IntegrationTest := Seq("-Xmx2G"), + parallelExecution in IntegrationTest := false + ) lazy val docs = project - .dependsOn(core, datasource) - .disablePlugins(SparkPackagePlugin) + .dependsOn(core, datasource, pyrasterframes) + .enablePlugins(SiteScaladocPlugin, ParadoxPlugin, GhpagesPlugin, ScalaUnidocPlugin) + .settings( + apiURL := Some(url("http://rasterframes.io/latest/api")), + autoAPIMappings := true, + ghpagesNoJekyll := true, + ScalaUnidoc / siteSubdirName := "latest/api", + paradox / siteSubdirName := ".", + paradoxProperties ++= Map( + "github.base_url" -> "https://github.com/locationtech/rasterframes", + "version" -> version.value, + "scaladoc.org.apache.spark.sql.rf" -> "http://rasterframes.io/latest" + ), + paradoxNavigationExpandDepth := Some(3), + paradoxTheme := Some(builtinParadoxTheme("generic")), + makeSite := makeSite + .dependsOn(Compile / unidoc) + .dependsOn((Compile / paradox) + .dependsOn(pyrasterframes / doc) + ).value, + Compile / paradox / sourceDirectories += (pyrasterframes / Python / doc / target).value, + ) + .settings( + addMappingsToSiteDir(ScalaUnidoc / packageDoc / mappings, ScalaUnidoc / siteSubdirName) + ) + .settings( + addMappingsToSiteDir(Compile / paradox / mappings, paradox / siteSubdirName) + ) lazy val bench = project .dependsOn(core % "compile->test") - .disablePlugins(SparkPackagePlugin) .settings(publish / skip := true) diff --git a/build/circleci/Dockerfile b/build/circleci/Dockerfile new file mode 100644 index 000000000..334c1b15f --- /dev/null +++ b/build/circleci/Dockerfile @@ -0,0 +1,74 @@ +FROM circleci/openjdk:8-jdk + +ENV OPENJPEG_VERSION 2.3.0 +ENV GDAL_VERSION 2.4.1 +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/ + +# most of these libraries required for +# python-pip pandoc && pip install setuptools => required for pyrasterframes testing +RUN sudo apt-get update && \ + sudo apt remove \ + python python-minimal python2.7 python2.7-minimal \ + libpython-stdlib libpython2.7 libpython2.7-minimal libpython2.7-stdlib \ + && sudo apt-get install -y \ + python3 \ + python3-pip \ + pandoc \ + wget \ + gcc g++ build-essential \ + libcurl4-gnutls-dev \ + libproj-dev \ + libgeos-dev \ + libhdf4-alt-dev \ + libhdf5-serial-dev \ + bash-completion \ + cmake \ + imagemagick \ + libpng-dev \ + swig \ + ant \ + && sudo apt autoremove \ + && sudo apt-get clean all \ + && pip3 install setuptools ipython==6.2.1 \ + && sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 1 + +# install OpenJPEG +RUN cd /tmp && \ + wget https://github.com/uclouvain/openjpeg/archive/v${OPENJPEG_VERSION}.tar.gz && \ + tar -xf v${OPENJPEG_VERSION}.tar.gz && \ + cd openjpeg-${OPENJPEG_VERSION}/ && \ + mkdir build && \ + cd build && \ + cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/ && \ + make -j && \ + sudo make install && \ + cd /tmp && rm -Rf v${OPENJPEG_VERSION}.tar.gz openjpeg* + +# Compile and install GDAL with Java bindings +RUN cd /tmp && \ + wget http://download.osgeo.org/gdal/${GDAL_VERSION}/gdal-${GDAL_VERSION}.tar.gz && \ + tar -xf gdal-${GDAL_VERSION}.tar.gz && \ + cd gdal-${GDAL_VERSION} && \ + ./configure \ + --with-curl \ + --with-hdf4 \ + --with-hdf5 \ + --with-geos \ + --with-geotiff=internal \ + --with-hide-internal-symbols \ + --with-java=$JAVA_HOME \ + --with-libtiff=internal \ + --with-libz=internal \ + --with-mrf \ + --with-openjpeg \ + --with-threads \ + --without-jp2mrsid \ + --without-netcdf \ + --without-ecw \ + && \ + make -j 8 && \ + sudo make install && \ + cd swig/java && \ + sudo make install && \ + sudo ldconfig && \ + cd /tmp && sudo rm -Rf gdal* diff --git a/build/circleci/README.md b/build/circleci/README.md new file mode 100644 index 000000000..69b9cdff3 --- /dev/null +++ b/build/circleci/README.md @@ -0,0 +1,6 @@ +# CircleCI Dockerfile Build file + +```bash +docker build -t s22s/rasterframes-circleci:latest . +docker push s22s/rasterframes-circleci:latest +``` diff --git a/core/build.sbt b/core/build.sbt deleted file mode 100644 index eb5164045..000000000 --- a/core/build.sbt +++ /dev/null @@ -1,35 +0,0 @@ -enablePlugins(BuildInfoPlugin) - -moduleName := "rasterframes" - -libraryDependencies ++= Seq( - "com.chuusai" %% "shapeless" % "2.3.2", - "org.locationtech.geomesa" %% "geomesa-z3" % rfGeoMesaVersion.value, - "org.locationtech.geomesa" %% "geomesa-spark-jts" % rfGeoMesaVersion.value exclude("jgridshift", "jgridshift"), - - spark("core").value % Provided, - spark("mllib").value % Provided, - spark("sql").value % Provided, - geotrellis("spark").value, - geotrellis("raster").value, - geotrellis("s3").value, - geotrellis("spark-testkit").value % Test excludeAll ( - ExclusionRule(organization = "org.scalastic"), - ExclusionRule(organization = "org.scalatest") - ), - scalaTest -) - -buildInfoKeys ++= Seq[BuildInfoKey]( - name, version, scalaVersion, sbtVersion, rfGeoTrellisVersion, rfGeoMesaVersion, rfSparkVersion -) - -buildInfoPackage := "astraea.spark.rasterframes" - -buildInfoObject := "RFBuildInfo" - -buildInfoOptions := Seq( - BuildInfoOption.ToMap, - BuildInfoOption.BuildTime -) - diff --git a/core/src/it/resources/log4j.properties b/core/src/it/resources/log4j.properties index 378ae8e61..1135e4b34 100644 --- a/core/src/it/resources/log4j.properties +++ b/core/src/it/resources/log4j.properties @@ -37,8 +37,8 @@ log4j.logger.org.spark_project.jetty=WARN log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO -log4j.logger.astraea.spark.rasterframes=DEBUG -log4j.logger.astraea.spark.rasterframes.ref=TRACE +log4j.logger.org.locationtech.rasterframes=WARN +log4j.logger.org.locationtech.rasterframes.ref=WARN log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support diff --git a/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala b/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala deleted file mode 100644 index 6f9069183..000000000 --- a/core/src/it/scala/astraea/spark/rasterframes/ref/RasterSourceIT.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2019 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes.ref - -import java.net.URI - -import astraea.spark.rasterframes.TestEnvironment.ReadMonitor -import astraea.spark.rasterframes.ref.RasterSource.FileGeoTiffRasterSource -import astraea.spark.rasterframes.{TestData, TestEnvironment} -import geotrellis.raster.io.geotiff.GeoTiff -import geotrellis.vector.Extent -import org.apache.spark.sql.rf.RasterSourceUDT - -/** - * - * - * @since 8/22/18 - */ -class RasterSourceIT extends TestEnvironment with TestData { - def sub(e: Extent) = { - val c = e.center - val w = e.width - val h = e.height - Extent(c.x, c.y, c.x + w * 0.1, c.y + h * 0.1) - } - - describe("RasterSource.readAll") { - it("should return consistently ordered tiles across bands for a given scene") { - // These specific scenes exhibit the problem where we see different subtile segment ordering across the bands of a given scene. - val rURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B4.TIF") - val bURI = new URI("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B2.TIF") - - val red = RasterSource(rURI).readAll().left.get - val blue = RasterSource(bURI).readAll().left.get - - red should not be empty - red.size should equal(blue.size) - - red.map(_.dimensions) should contain theSameElementsAs blue.map(_.dimensions) - } - } -} diff --git a/core/src/it/scala/org/locationtech/rasterframes/ref/RasterSourceIT.scala b/core/src/it/scala/org/locationtech/rasterframes/ref/RasterSourceIT.scala new file mode 100644 index 000000000..ae8b0b1d4 --- /dev/null +++ b/core/src/it/scala/org/locationtech/rasterframes/ref/RasterSourceIT.scala @@ -0,0 +1,126 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import java.lang.Math.ceil +import java.net.URI + +import org.locationtech.rasterframes +import org.locationtech.rasterframes.util.time +import org.locationtech.rasterframes.{NOMINAL_TILE_SIZE, TestData, TestEnvironment} + +/** + * + * + * @since 8/22/18 + */ +class RasterSourceIT extends TestEnvironment with TestData { + + describe("RasterSource.readAll") { + it("should return consistently ordered tiles across bands for a given scene") { + time(s"two band comparison prefer-gdal=${ rasterframes.rfConfig.getBoolean("prefer-gdal")}") { + // These specific scenes exhibit the problem where we see different subtile segment ordering across the bands of a given scene. + val rURI = new URI( + "https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B4.TIF") + val bURI = new URI( + "https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/016/034/LC08_L1TP_016034_20181003_20181003_01_RT/LC08_L1TP_016034_20181003_20181003_01_RT_B2.TIF") + val red = time("read B4") { + RasterSource(rURI).readAll() + } + val blue = time("read B2") { + RasterSource(bURI).readAll() + } + time("test empty") { + red should not be empty + } + time("compare sizes") { + red.size should equal(blue.size) + } + time("compare dimensions") { + red.map(_.dimensions) should contain theSameElementsAs blue.map(_.dimensions) + } + } + } + } + + if (GDALRasterSource.hasGDAL) { + println("GDAL version: " + GDALRasterSource.gdalVersion()) + + describe("GDAL support") { + + + it("should read JPEG2000 scene") { + RasterSource(localSentinel).readAll().flatMap(_.tile.statisticsDouble).size should be(64) + } + + it("should read small MRF scene with one band converted from MODIS HDF") { + val (expectedTileCount, _) = expectedTileCountAndBands(2400, 2400) + RasterSource(modisConvertedMrfPath).readAll().flatMap(_.tile.statisticsDouble).size should be (expectedTileCount) + } + + it("should read remote HTTP MRF scene") { + val (expectedTileCount, bands) = expectedTileCountAndBands(6257, 7584, 4) + RasterSource(remoteHttpMrfPath).readAll(bands = bands).flatMap(_.tile.statisticsDouble).size should be (expectedTileCount) + } + + it("should read remote S3 MRF scene") { + val (expectedTileCount, bands) = expectedTileCountAndBands(6257, 7584, 4) + RasterSource(remoteS3MrfPath).readAll(bands = bands).flatMap(_.tile.statisticsDouble).size should be (expectedTileCount) + } + } + } else { + describe("GDAL missing error support") { + it("should throw exception reading JPEG2000 scene") { + intercept[IllegalArgumentException] { + RasterSource(localSentinel) + } + } + + it("should throw exception reading MRF scene with one band converted from MODIS HDF") { + intercept[IllegalArgumentException] { + RasterSource(modisConvertedMrfPath) + } + } + + it("should throw exception reading remote HTTP MRF scene") { + intercept[IllegalArgumentException] { + RasterSource(remoteHttpMrfPath) + } + } + + it("should throw exception reading remote S3 MRF scene") { + intercept[IllegalArgumentException] { + RasterSource(remoteS3MrfPath) + } + } + } + } + + private def expectedTileCountAndBands(x:Int, y:Int, bandCount:Int = 1) = { + val imageDimensions = Seq(x.toDouble, y.toDouble) + val tilesPerBand = imageDimensions.map(x ⇒ ceil(x / NOMINAL_TILE_SIZE)).product + val bands = Range(0, bandCount) + val expectedTileCount = tilesPerBand * bands.length + (expectedTileCount, bands) + } + +} diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 980088e28..e7d3e57f5 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -1,3 +1,19 @@ rasterframes { - nominal-tile-size: 256 + nominal-tile-size = 256 + prefer-gdal = true + showable-tiles = true + showable-max-cells = 20 + raster-source-cache-timeout = 120 seconds +} + +vlm.gdal { + options { + // See https://trac.osgeo.org/gdal/wiki/ConfigOptions for options + //CPL_DEBUG = "OFF" + AWS_REQUEST_PAYER = "requester" + GDAL_DISABLE_READDIR_ON_OPEN = "YES" + CPL_VSIL_CURL_ALLOWED_EXTENSIONS = ".tif,.tiff,.jp2,.mrf,.idx,.lrc,.mrf.aux.xml,.vrt" + } + // set this to `false` if CPL_DEBUG is `ON` + useExceptions = true } \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/MetadataKeys.scala b/core/src/main/scala/astraea/spark/rasterframes/MetadataKeys.scala deleted file mode 100644 index 2b4948798..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/MetadataKeys.scala +++ /dev/null @@ -1,13 +0,0 @@ -package astraea.spark.rasterframes - -/** - * - * @since 2/19/18 - */ -trait MetadataKeys { - /** Key under which ContextRDD metadata is stored. */ - private[rasterframes] val CONTEXT_METADATA_KEY = "_context" - - /** Key under which RasterFrame role a column plays. */ - private[rasterframes] val SPATIAL_ROLE_KEY = "_stRole" -} diff --git a/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala b/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala deleted file mode 100644 index ff08dd44c..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/RasterFunctions.scala +++ /dev/null @@ -1,353 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package astraea.spark.rasterframes - -import astraea.spark.rasterframes.expressions.TileAssembler -import astraea.spark.rasterframes.expressions.accessors._ -import astraea.spark.rasterframes.expressions.aggstats._ -import astraea.spark.rasterframes.expressions.generators._ -import astraea.spark.rasterframes.expressions.localops._ -import astraea.spark.rasterframes.expressions.tilestats._ -import astraea.spark.rasterframes.expressions.transformers._ -import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics} -import astraea.spark.rasterframes.{functions => F} -import com.vividsolutions.jts.geom.{Envelope, Geometry} -import geotrellis.proj4.CRS -import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp -import geotrellis.raster.{CellType, Tile} -import org.apache.spark.annotation.Experimental -import org.apache.spark.sql._ -import org.apache.spark.sql.functions._ - -/** - * UDFs for working with Tiles in Spark DataFrames. - * - * @since 4/3/17 - */ -trait RasterFunctions { - import util._ - import PrimitiveEncoders._ - - // format: off - /** Create a row for each cell in Tile. */ - def explode_tiles(cols: Column*): Column = explode_tiles_sample(1.0, None, cols: _*) - - /** Create a row for each cell in Tile with random sampling and optional seed. */ - def explode_tiles_sample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column = - ExplodeTiles(sampleFraction, seed, cols) - - /** Create a row for each cell in Tile with random sampling (no seed). */ - def explode_tiles_sample(sampleFraction: Double, cols: Column*): Column = - ExplodeTiles(sampleFraction, None, cols) - - /** Query the number of (cols, rows) in a Tile. */ - def tile_dimensions(col: Column): Column = GetDimensions(col) - - /** Extracts the bounding box of a geometry as a JTS envelope. */ - def envelope(col: Column): TypedColumn[Any, Envelope] = GetEnvelope(col) - - /** Flattens Tile into a double array. */ - def tile_to_array_double(col: Column): TypedColumn[Any, Array[Double]] = - TileToArrayDouble(col) - - /** Flattens Tile into an integer array. */ - def tile_to_array_int(col: Column): TypedColumn[Any, Array[Double]] = - TileToArrayDouble(col) - - @Experimental - /** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/ - def array_to_tile(arrayCol: Column, cols: Int, rows: Int) = withAlias("array_to_tile", arrayCol)( - udf[Tile, AnyRef](F.arrayToTile(cols, rows)).apply(arrayCol) - ) - - /** Create a Tile from a column of cell data with location indexes and preform cell conversion. */ - def assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] = - convert_cell_type(TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile](singlebandTileEncoder) - - /** Create a Tile from a column of cell data with location indexes. */ - def assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] = - TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows) - - /** Extract the Tile's cell type */ - def cell_type(col: Column): TypedColumn[Any, CellType] = GetCellType(col) - - /** Change the Tile's cell type */ - def convert_cell_type(col: Column, cellType: CellType): TypedColumn[Any, Tile] = - SetCellType(col, cellType) - - /** Change the Tile's cell type */ - def convert_cell_type(col: Column, cellTypeName: String): TypedColumn[Any, Tile] = - SetCellType(col, cellTypeName) - - /** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */ - def bounds_geometry(bounds: Column): TypedColumn[Any, Geometry] = BoundsToGeometry(bounds) - - /** Assign a `NoData` value to the Tiles. */ - def with_no_data(col: Column, nodata: Double): TypedColumn[Any, Tile] = withAlias("with_no_data", col)( - udf[Tile, Tile](F.withNoData(nodata)).apply(col) - ).as[Tile] - - /** Compute the full column aggregate floating point histogram. */ - def agg_approx_histogram(col: Column): TypedColumn[Any, CellHistogram] = - HistogramAggregate(col) - - /** Compute the full column aggregate floating point statistics. */ - def agg_stats(col: Column): TypedColumn[Any, CellStatistics] = - CellStatsAggregate(col) - - /** Computes the column aggregate mean. */ - def agg_mean(col: Column) = CellMeanAggregate(col) - - /** Computes the number of non-NoData cells in a column. */ - def agg_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.DataCells(col) - - /** Computes the number of NoData cells in a column. */ - def agg_no_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.NoDataCells(col) - - /** Compute the Tile-wise mean */ - def tile_mean(col: Column): TypedColumn[Any, Double] = - TileMean(col) - - /** Compute the Tile-wise sum */ - def tile_sum(col: Column): TypedColumn[Any, Double] = - Sum(col) - - /** Compute the minimum cell value in tile. */ - def tile_min(col: Column): TypedColumn[Any, Double] = - TileMin(col) - - /** Compute the maximum cell value in tile. */ - def tile_max(col: Column): TypedColumn[Any, Double] = - TileMax(col) - - /** Compute TileHistogram of Tile values. */ - def tile_histogram(col: Column): TypedColumn[Any, CellHistogram] = - TileHistogram(col) - - /** Compute statistics of Tile values. */ - def tile_stats(col: Column): TypedColumn[Any, CellStatistics] = - TileStats(col) - - /** Counts the number of non-NoData cells per Tile. */ - def data_cells(tile: Column): TypedColumn[Any, Long] = - DataCells(tile) - - /** Counts the number of NoData cells per Tile. */ - def no_data_cells(tile: Column): TypedColumn[Any, Long] = - NoDataCells(tile) - - def is_no_data_tile(tile: Column): TypedColumn[Any, Boolean] = - IsNoDataTile(tile) - - /** Compute cell-local aggregate descriptive statistics for a column of Tiles. */ - def agg_local_stats(col: Column) = - LocalStatsAggregate(col) - - /** Compute the cell-wise/local max operation between Tiles in a column. */ - def agg_local_max(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMaxUDAF(col) - - /** Compute the cellwise/local min operation between Tiles in a column. */ - def agg_local_min(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMinUDAF(col) - - /** Compute the cellwise/local mean operation between Tiles in a column. */ - def agg_local_mean(col: Column): TypedColumn[Any, Tile] = LocalMeanAggregate(col) - - /** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */ - def agg_local_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalDataCellsUDAF(col) - - /** Compute the cellwise/local count of NoData cells for all Tiles in a column. */ - def agg_local_no_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalNoDataCellsUDAF(col) - - /** Cellwise addition between two Tiles or Tile and scalar column. */ - def local_add(left: Column, right: Column): TypedColumn[Any, Tile] = Add(left, right) - - /** Cellwise addition of a scalar value to a tile. */ - def local_add[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Add(tileCol, value) - - /** Cellwise subtraction between two Tiles. */ - def local_subtract(left: Column, right: Column): TypedColumn[Any, Tile] = Subtract(left, right) - - /** Cellwise subtraction of a scalar value from a tile. */ - def local_subtract[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Subtract(tileCol, value) - - /** Cellwise multiplication between two Tiles. */ - def local_multiply(left: Column, right: Column): TypedColumn[Any, Tile] = Multiply(left, right) - - /** Cellwise multiplication of a tile by a scalar value. */ - def local_multiply[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Multiply(tileCol, value) - - /** Cellwise division between two Tiles. */ - def local_divide(left: Column, right: Column): TypedColumn[Any, Tile] = Divide(left, right) - - /** Cellwise division of a tile by a scalar value. */ - def local_divide[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Divide(tileCol, value) - - /** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */ - def local_algebra(op: LocalTileBinaryOp, left: Column, right: Column): - TypedColumn[Any, Tile] = - withAlias(opName(op), left, right)( - udf[Tile, Tile, Tile](op.apply).apply(left, right) - ).as[Tile] - - /** Compute the normalized difference of two tile columns */ - def normalized_difference(left: Column, right: Column) = - NormalizedDifference(left, right) - - /** Constructor for constant tile column */ - def make_constant_tile(value: Number, cols: Int, rows: Int, cellType: String): TypedColumn[Any, Tile] = - udf(() => F.makeConstantTile(value, cols, rows, cellType)).apply().as(s"constant_$cellType").as[Tile] - - /** Alias for column of constant tiles of zero */ - def tile_zeros(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] = - udf(() => F.tileZeros(cols, rows, cellType)).apply().as(s"zeros_$cellType").as[Tile] - - /** Alias for column of constant tiles of one */ - def tile_ones(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] = - udf(() => F.tileOnes(cols, rows, cellType)).apply().as(s"ones_$cellType").as[Tile] - - /** Where the mask tile contains NODATA, replace values in the source tile with NODATA */ - def mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] = - Mask.MaskByDefined(sourceTile, maskTile) - - /** Where the mask tile equals the mask value, replace values in the source tile with NODATA */ - def mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] = - Mask.MaskByValue(sourceTile, maskTile, maskValue) - - /** Where the mask tile DOES NOT contain NODATA, replace values in the source tile with NODATA */ - def inverse_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] = - Mask.InverseMaskByDefined(sourceTile, maskTile) - - /** Create a tile where cells in the grid defined by cols, rows, and bounds are filled with the given value. */ - def rasterize(geometry: Column, bounds: Column, value: Column, cols: Int, rows: Int): TypedColumn[Any, Tile] = - withAlias("rasterize", geometry)( - udf(F.rasterize(_: Geometry, _: Geometry, _: Int, cols, rows)).apply(geometry, bounds, value) - ).as[Tile] - - /** Reproject a column of geometry from one CRS to another. */ - def reproject_geometry(sourceGeom: Column, srcCRS: CRS, dstCRSCol: Column): TypedColumn[Any, Geometry] = - ReprojectGeometry(sourceGeom, srcCRS, dstCRSCol) - - /** Reproject a column of geometry from one CRS to another. */ - def reproject_geometry(sourceGeom: Column, srcCRSCol: Column, dstCRS: CRS): TypedColumn[Any, Geometry] = - ReprojectGeometry(sourceGeom, srcCRSCol, dstCRS) - - /** Reproject a column of geometry from one CRS to another. */ - def reproject_geometry(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] = - ReprojectGeometry(sourceGeom, srcCRS, dstCRS) - - /** Render Tile as ASCII string, for debugging purposes. */ - def render_ascii(col: Column): TypedColumn[Any, String] = - DebugRender.RenderAscii(col) - - /** Render Tile cell values as numeric values, for debugging purposes. */ - def render_matrix(col: Column): TypedColumn[Any, String] = - DebugRender.RenderMatrix(col) - - /** Cellwise less than value comparison between two tiles. */ - def local_less(left: Column, right: Column): TypedColumn[Any, Tile] = - Less(left, right) - - /** Cellwise less than value comparison between a tile and a scalar. */ - def local_less[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = - Less(tileCol, value) - - /** Cellwise less than or equal to value comparison between a tile and a scalar. */ - def local_less_equal(left: Column, right: Column): TypedColumn[Any, Tile] = - LessEqual(left, right) - - /** Cellwise less than or equal to value comparison between a tile and a scalar. */ - def local_less_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = - LessEqual(tileCol, value) - - /** Cellwise greater than value comparison between two tiles. */ - def local_greater(left: Column, right: Column): TypedColumn[Any, Tile] = - Greater(left, right) - - /** Cellwise greater than value comparison between a tile and a scalar. */ - def local_greater[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = - Greater(tileCol, value) - - /** Cellwise greater than or equal to value comparison between two tiles. */ - def local_greater_equal(left: Column, right: Column): TypedColumn[Any, Tile] = - GreaterEqual(left, right) - - /** Cellwise greater than or equal to value comparison between a tile and a scalar. */ - def local_greater_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = - GreaterEqual(tileCol, value) - - /** Cellwise equal to value comparison between two tiles. */ - def local_equal(left: Column, right: Column): TypedColumn[Any, Tile] = - Equal(left, right) - - /** Cellwise equal to value comparison between a tile and a scalar. */ - def local_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = - Equal(tileCol, value) - - /** Cellwise inequality comparison between two tiles. */ - def local_unequal(left: Column, right: Column): TypedColumn[Any, Tile] = - Unequal(left, right) - - /** Cellwise inequality comparison between a tile and a scalar. */ - def local_unequal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = - Unequal(tileCol, value) - - /** Round cell values to nearest integer without chaning cell type. */ - def round(tileCol: Column): TypedColumn[Any, Tile] = - Round(tileCol) - - /** Take natural logarithm of cell values. */ - def log(tileCol: Column): TypedColumn[Any, Tile] = - Log(tileCol) - - /** Take base 10 logarithm of cell values. */ - def log10(tileCol: Column): TypedColumn[Any, Tile] = - Log10(tileCol) - - /** Take base 2 logarithm of cell values. */ - def log2(tileCol: Column): TypedColumn[Any, Tile] = - Log2(tileCol) - - /** Natural logarithm of one plus cell values. */ - def log1p(tileCol: Column): TypedColumn[Any, Tile] = - Log1p(tileCol) - - /** Exponential of cell values */ - def exp(tileCol: Column): TypedColumn[Any, Tile] = - Exp(tileCol) - - /** Ten to the power of cell values */ - def exp10(tileCol: Column): TypedColumn[Any, Tile] = - Exp10(tileCol) - - /** Two to the power of cell values */ - def exp2(tileCol: Column): TypedColumn[Any, Tile] = - Exp2(tileCol) - - /** Exponential of cell values, less one*/ - def expm1(tileCol: Column): TypedColumn[Any, Tile] = - ExpM1(tileCol) - - /** Resample tile using nearest-neighbor */ - def resample[T: Numeric](tileCol: Column, value: T) = Resample(tileCol, value) - - /** Resample tile using nearest-neighbor */ - def resample(tileCol: Column, column2: Column) = Resample(tileCol, column2) - -} diff --git a/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala b/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala deleted file mode 100644 index 340b17198..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/StandardColumns.scala +++ /dev/null @@ -1,63 +0,0 @@ -package astraea.spark.rasterframes - -import java.sql.Timestamp - -import geotrellis.raster.{Tile, TileFeature} -import geotrellis.spark.{SpatialKey, TemporalKey} -import org.apache.spark.sql.functions.col -import com.vividsolutions.jts.geom.{Point => jtsPoint, Polygon => jtsPolygon} -import geotrellis.proj4.CRS -import geotrellis.vector.Extent -import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._ - -/** - * Constants identifying column in most RasterFrames. - * - * @since 2/19/18 - */ -trait StandardColumns { - /** Default RasterFrame spatial column name. */ - val SPATIAL_KEY_COLUMN = col("spatial_key").as[SpatialKey] - - /** Default RasterFrame temporal column name. */ - val TEMPORAL_KEY_COLUMN = col("temporal_key").as[TemporalKey] - - /** Default RasterFrame timestamp column name */ - val TIMESTAMP_COLUMN = col("timestamp").as[Timestamp] - - - /** Default RasterFrame column name for an tile bounds value. */ - // This is a `def` because `PolygonUDT` needs to be initialized first. - def BOUNDS_COLUMN = col("bounds").as[jtsPolygon] - - /** Default RasterFrame column name for the center coordinates of the tile's bounds. */ - // This is a `def` because `PointUDT` needs to be initialized first. - def CENTER_COLUMN = col("center").as[jtsPoint] - - /** Default Extent column name. */ - def EXTENT_COLUMN = col("extent").as[Extent] - - /** Default CRS column name. */ - def CRS_COLUMN = col("crs").as[CRS] - - /** Default RasterFrame column name for an added spatial index. */ - val SPATIAL_INDEX_COLUMN = col("spatial_index").as[Long] - - /** Default RasterFrame tile column name. */ - // This is a `def` because `TileUDT` needs to be initialized first. - def TILE_COLUMN = col("tile").as[Tile] - - /** Default RasterFrame [[TileFeature.data]] column name. */ - val TILE_FEATURE_DATA_COLUMN = col("tile_data") - - /** Default GeoTiff tags column. */ - val METADATA_COLUMN = col("metadata").as[Map[String, String]] - - /** Default column index column for the cells of exploded tiles. */ - val COLUMN_INDEX_COLUMN = col("column_index").as[Int] - - /** Default teil column index column for the cells of exploded tiles. */ - val ROW_INDEX_COLUMN = col("row_index").as[Int] -} - -object StandardColumns extends StandardColumns diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala deleted file mode 100644 index 2581f8be5..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToRasterRefs.scala +++ /dev/null @@ -1,77 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2019 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes.expressions.transformers - -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.ref.RasterRef -import astraea.spark.rasterframes.util._ -import com.typesafe.scalalogging.LazyLogging -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback -import org.apache.spark.sql.rf._ -import org.apache.spark.sql.types.{DataType, StructField, StructType} -import org.apache.spark.sql.{Column, TypedColumn} - -import scala.util.control.NonFatal - -/** - * Accepts RasterRef and generates one or more RasterRef instances representing the - * native internal sub-tiling, if any (and requested). - * - * @since 9/6/18 - */ -case class RasterSourceToRasterRefs(children: Seq[Expression], applyTiling: Boolean) extends Expression - with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging { - - private val RasterSourceType = new RasterSourceUDT() - private val rasterRefSchema = CatalystSerializer[RasterRef].schema - - override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType) - override def nodeName: String = "raster_source_to_raster_ref" - - override def elementSchema: StructType = StructType( - children.map(e ⇒ StructField(e.name, rasterRefSchema, false)) - ) - - override def eval(input: InternalRow): TraversableOnce[InternalRow] = { - try { - val refs = children.map { child ⇒ - val src = RasterSourceType.deserialize(child.eval(input)) - if (applyTiling) src.nativeTiling.map(e ⇒ RasterRef(src, Some(e))) else Seq(RasterRef(src)) - } - refs.transpose.map(ts ⇒ InternalRow(ts.map(_.toInternalRow): _*)) - } - catch { - case NonFatal(ex) ⇒ - logger.error("Error fetching data for " + input, ex) - Traversable.empty - } - } -} - -object RasterSourceToRasterRefs { - def apply(rrs: Column*): TypedColumn[Any, RasterRef] = apply(true, rrs: _*) - def apply(applyTiling: Boolean, rrs: Column*): TypedColumn[Any, RasterRef] = - new Column(new RasterSourceToRasterRefs(rrs.map(_.expr), applyTiling)).as[RasterRef] -} diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala b/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala deleted file mode 100644 index 2b1caa3ba..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterSourceToTiles.scala +++ /dev/null @@ -1,84 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2019 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes.expressions.transformers - -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.util._ -import com.typesafe.scalalogging.LazyLogging -import org.apache.spark.sql.Column -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback -import org.apache.spark.sql.rf._ -import org.apache.spark.sql.types.{DataType, StructField, StructType} - -import scala.util.control.NonFatal - -/** - * Accepts RasterRef and generates one or more RasterRef instances representing the - * native internal sub-tiling, if any (and requested). - * - * @since 9/6/18 - */ -case class RasterSourceToTiles(children: Seq[Expression], applyTiling: Boolean) extends Expression - with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging { - - private val RasterSourceType = new RasterSourceUDT() - private val TileType = new TileUDT() - - override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType) - override def nodeName: String = "raster_source_to_tile" - - override def elementSchema: StructType = StructType( - children.map(e ⇒ StructField(e.name, TileType, true)) - ) - - override def eval(input: InternalRow): TraversableOnce[InternalRow] = { - implicit val ser = TileUDT.tileSerializer - - try { - val refs = children.map { child ⇒ - val src = RasterSourceType.deserialize(child.eval(input)) - val tiles = if (applyTiling) src.readAll() else { - src.read(src.extent).right.map(Seq(_)).left.map(Seq(_)) - } - - require(tiles.isLeft, "Multiband tiles are not yet supported") - - tiles.left.get - } - refs.transpose.map(ts ⇒ InternalRow(ts.map(r ⇒ r.tile.toInternalRow): _*)) - } - catch { - case NonFatal(ex) ⇒ - logger.error("Error fetching data for " + sql, ex) - Traversable.empty - } - } -} - - -object RasterSourceToTiles { - def apply(rrs: Column*): Column = apply(true, rrs: _*) - def apply(applyTiling: Boolean, rrs: Column*): Column = - new Column(new RasterSourceToTiles(rrs.map(_.expr), applyTiling)) -} \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/ContextRDDMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/ContextRDDMethods.scala deleted file mode 100644 index ef0c901f8..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/ContextRDDMethods.scala +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package astraea.spark.rasterframes.extensions - -import astraea.spark.rasterframes.PairRDDConverter._ -import astraea.spark.rasterframes.StandardColumns._ -import astraea.spark.rasterframes.extensions.Implicits._ -import astraea.spark.rasterframes.util._ -import astraea.spark.rasterframes.{PairRDDConverter, RasterFrame} -import geotrellis.raster.{CellGrid, Tile} -import geotrellis.spark._ -import geotrellis.spark.io._ -import geotrellis.util.MethodExtensions -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.SparkSession - -/** - * Extension method on `ContextRDD`-shaped RDDs with appropriate context bounds to create a RasterFrame. - * @since 7/18/17 - */ -abstract class SpatialContextRDDMethods[T <: CellGrid](implicit spark: SparkSession) - extends MethodExtensions[RDD[(SpatialKey, T)] with Metadata[TileLayerMetadata[SpatialKey]]] { - import PairRDDConverter._ - - def toRF(implicit converter: PairRDDConverter[SpatialKey, T]): RasterFrame = toRF(TILE_COLUMN.columnName) - - def toRF(tileColumnName: String)(implicit converter: PairRDDConverter[SpatialKey, T]): RasterFrame = { - val df = self.toDataFrame.setSpatialColumnRole(SPATIAL_KEY_COLUMN, self.metadata) - val defName = TILE_COLUMN.columnName - df.mapWhen(_ ⇒ tileColumnName != defName, _.withColumnRenamed(defName, tileColumnName)) - .certify - } -} - -/** - * Extension method on `ContextRDD`-shaped [[Tile]] RDDs keyed with [[SpaceTimeKey]], with appropriate context bounds to create a RasterFrame. - * @since 9/11/17 - */ -abstract class SpatioTemporalContextRDDMethods[T <: CellGrid]( - implicit spark: SparkSession) - extends MethodExtensions[RDD[(SpaceTimeKey, T)] with Metadata[TileLayerMetadata[SpaceTimeKey]]] { - - def toRF(implicit converter: PairRDDConverter[SpaceTimeKey, T]): RasterFrame = toRF(TILE_COLUMN.columnName) - - def toRF(tileColumnName: String)(implicit converter: PairRDDConverter[SpaceTimeKey, T]): RasterFrame = { - val df = self.toDataFrame - .setSpatialColumnRole(SPATIAL_KEY_COLUMN, self.metadata) - .setTemporalColumnRole(TEMPORAL_KEY_COLUMN) - val defName = TILE_COLUMN.columnName - df.mapWhen(_ ⇒ tileColumnName != defName, _.withColumnRenamed(defName, tileColumnName)) - .certify - } -} diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala b/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala deleted file mode 100644 index ca38322ac..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/DataFrameMethods.scala +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package astraea.spark.rasterframes.extensions - -import astraea.spark.rasterframes.StandardColumns._ -import astraea.spark.rasterframes.util._ -import astraea.spark.rasterframes.{MetadataKeys, RasterFrame} -import geotrellis.raster.Tile -import geotrellis.spark.io._ -import geotrellis.spark.{SpaceTimeKey, SpatialComponent, SpatialKey, TemporalKey, TileLayerMetadata} -import geotrellis.util.MethodExtensions -import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.rf.TileUDT -import org.apache.spark.sql.types.{MetadataBuilder, StructField} -import org.apache.spark.sql.{Column, DataFrame, TypedColumn} -import spray.json.JsonFormat -import astraea.spark.rasterframes.encoders.StandardEncoders._ -import scala.util.Try - -/** - * Extension methods over [[DataFrame]]. - * - * @since 7/18/17 - */ -trait DataFrameMethods[DF <: DataFrame] extends MethodExtensions[DF] with MetadataKeys { - import Implicits.{WithDataFrameMethods, WithMetadataBuilderMethods, WithMetadataMethods, WithRasterFrameMethods} - - private def selector(column: Column) = (attr: Attribute) ⇒ - attr.name == column.columnName || attr.semanticEquals(column.expr) - - /** Map over the Attribute representation of Columns, modifying the one matching `column` with `op`. */ - private[astraea] def mapColumnAttribute(column: Column, op: Attribute ⇒ Attribute): DF = { - val analyzed = self.queryExecution.analyzed.output - val selects = selector(column) - val attrs = analyzed.map { attr ⇒ - if(selects(attr)) op(attr) else attr - } - self.select(attrs.map(a ⇒ new Column(a)): _*).asInstanceOf[DF] - } - - private[astraea] def addColumnMetadata(column: Column, op: MetadataBuilder ⇒ MetadataBuilder): DF = { - mapColumnAttribute(column, attr ⇒ { - val md = new MetadataBuilder().withMetadata(attr.metadata) - attr.withMetadata(op(md).build) - }) - } - - private[astraea] def fetchMetadataValue[D](column: Column, reader: (Attribute) ⇒ D): Option[D] = { - val analyzed = self.queryExecution.analyzed.output - analyzed.find(selector(column)).map(reader) - } - - private[astraea] - def setSpatialColumnRole[K: SpatialComponent: JsonFormat]( - column: Column, md: TileLayerMetadata[K]): DF = - addColumnMetadata(column, - _.attachContext(md.asColumnMetadata).tagSpatialKey - ) - - private[astraea] - def setTemporalColumnRole(column: Column): DF = - addColumnMetadata(column, _.tagTemporalKey) - - /** Get the role tag the column plays in the RasterFrame, if any. */ - private[astraea] - def getColumnRole(column: Column): Option[String] = - fetchMetadataValue(column, _.metadata.getString(SPATIAL_ROLE_KEY)) - - /** Get the columns that are of type `Tile` */ - def tileColumns: Seq[TypedColumn[Any, Tile]] = - self.schema.fields - .filter(_.dataType.typeName.equalsIgnoreCase(TileUDT.typeName)) - .map(f ⇒ col(f.name).as[Tile]) - - /** Get the columns that are not of type `Tile` */ - def notTileColumns: Seq[Column] = - self.schema.fields - .filterNot(_.dataType.typeName.equalsIgnoreCase(TileUDT.typeName)) - .map(f ⇒ col(f.name)) - - /** Get the spatial column. */ - def spatialKeyColumn: Option[TypedColumn[Any, SpatialKey]] = { - val key = findSpatialKeyField - key - .map(_.name) - .map(col(_).as[SpatialKey]) - } - - /** Get the temporal column, if any. */ - def temporalKeyColumn: Option[TypedColumn[Any, TemporalKey]] = { - val key = findTemporalKeyField - key.map(_.name).map(col(_).as[TemporalKey]) - } - - /** Find the field tagged with the requested `role` */ - private[rasterframes] def findRoleField(role: String): Option[StructField] = - self.schema.fields.find( - f ⇒ - f.metadata.contains(SPATIAL_ROLE_KEY) && - f.metadata.getString(SPATIAL_ROLE_KEY) == role - ) - - /** The spatial key is the first one found with context metadata attached to it. */ - private[rasterframes] def findSpatialKeyField: Option[StructField] = - findRoleField(SPATIAL_KEY_COLUMN.columnName) - - /** The temporal key is the first one found with the temporal tag. */ - private[rasterframes] def findTemporalKeyField: Option[StructField] = - findRoleField(TEMPORAL_KEY_COLUMN.columnName) - - /** Renames all columns such that they start with the given prefix string. - * Useful for preparing dataframes for joins where duplicate names may arise. - */ - def withPrefixedColumnNames(prefix: String): DF = - self.columns.foldLeft(self)((df, c) ⇒ df.withColumnRenamed(c, s"$prefix$c").asInstanceOf[DF]) - - /** Converts this DataFrame to a RasterFrame after ensuring it has: - * - *
    - *
  1. a space or space-time key column - *
  2. one or more tile columns - *
  3. tile layout metadata - *
      - * - * If any of the above are violated, and [[IllegalArgumentException]] is thrown. - * - * @return validated RasterFrame - * @throws IllegalArgumentException when constraints are not met. - */ - @throws[IllegalArgumentException] - def asRF: RasterFrame = { - val potentialRF = certifyRasterframe(self) - - require( - potentialRF.findSpatialKeyField.nonEmpty, - "A RasterFrame requires a column identified as a spatial key" - ) - - require(potentialRF.tileColumns.nonEmpty, "A RasterFrame requires at least one tile column") - - require( - Try(potentialRF.tileLayerMetadata).isSuccess, - "A RasterFrame requires embedded TileLayerMetadata" - ) - - potentialRF - } - - /** - * Convert DataFrame into a RasterFrame - * - * @param spatialKey The column where the spatial key is stored - * @param tlm Metadata describing layout under which tiles were created. Note: no checking is - * performed to ensure metadata, key-space, and tiles are coherent. - * @throws IllegalArgumentException when constraints outlined in `asRF` are not met. - * @return Encoded RasterFrame - */ - @throws[IllegalArgumentException] - def asRF(spatialKey: Column, tlm: TileLayerMetadata[SpatialKey]): RasterFrame = - setSpatialColumnRole(spatialKey, tlm).asRF - - /** - * Convert DataFrame into a RasterFrame - * - * @param spatialKey The column where the spatial key is stored - * @param temporalKey The column tagged under the temporal role - * @param tlm Metadata describing layout under which tiles were created. Note: no checking is - * performed to ensure metadata, key-space, and tiles are coherent. - * @throws IllegalArgumentException when constraints outlined in `asRF` are not met. - * @return Encoded RasterFrame - */ - @throws[IllegalArgumentException] - def asRF(spatialKey: Column, temporalKey: Column, tlm: TileLayerMetadata[SpaceTimeKey]): RasterFrame = - setSpatialColumnRole(spatialKey, tlm) - .setTemporalColumnRole(temporalKey) - .asRF - -// @throws[IllegalArgumentException] -// def asRF(space: LayerSpace): RasterFrame = { -// require(tileColumns.isEmpty, "This method doesn't yet support existing tile columns") -// // We have two use cases to consider: This is already a rasterframe and we need to -// // reproject it. If we have RasterRefs then we reproject those -// val (refFields, otherFields) = self.schema.fields -// .partition(_.dataType.typeName.equalsIgnoreCase(RasterRefUDT.typeName)) -// -// val refCols = refFields.map(f ⇒ self(f.name).as[RasterRef]) -// val otherCols = otherFields.map(f ⇒ self(f.name)) -// -// // Reproject tile into layer space -// val projected = self.select(otherCols :+ ProjectIntoLayer(refCols, space): _*) -// -// // Lastly, convert cell type as desired -// val tileCols = projected.tileColumns.map(c ⇒ convert_cell_type(c, space.cell_type).as(c.columnName)) -// val remCols = projected.notTileColumns -// -// val layer = projected.select(remCols ++ tileCols: _*) -// -// val tlm = space.asTileLayerMetadata -// layer.setSpatialColumnRole(SPATIAL_KEY_COLUMN, tlm).asRF -// } - - /** - * Converts [[DataFrame]] to a RasterFrame if the following constraints are fulfilled: - * - *
        - *
      1. a space or space-time key column - *
      2. one or more tile columns - *
      3. tile layout metadata - *
          - * - * @return Some[RasterFrame] if constraints fulfilled, [[None]] otherwise. - */ - def asRFSafely: Option[RasterFrame] = Try(asRF).toOption - - /** - * Tests for the following conditions on the [[DataFrame]]: - * - *
            - *
          1. a space or space-time key column - *
          2. one or more tile columns - *
          3. tile layout metadata - *
              - * - * @return true if all constraints are fulfilled, false otherwise. - */ - def isRF: Boolean = Try(asRF).isSuccess - - /** Internal method for slapping the RasterFreame seal of approval on a DataFrame. - * Only call if if you are sure it has a spatial key and tile columns and TileLayerMetadata. */ - private[astraea] def certify = certifyRasterframe(self) -} diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/HttpRangeReader.scala b/core/src/main/scala/astraea/spark/rasterframes/ref/HttpRangeReader.scala deleted file mode 100644 index b0675e8f4..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/ref/HttpRangeReader.scala +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2016 Azavea - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// NB: Copied from -// https://github.com/locationtech/geotrellis/blob/835200c0a6031ddc09fca218ff082cefc9b116c6/spark/src/main/scala/geotrellis/spark/io/http/util/HttpRangeReader.scala -// TODO: Submit PR with changes/fixes. -//package geotrellis.spark.io.http.util - -package astraea.spark.rasterframes.ref - - -import geotrellis.util.{LazyLogging, RangeReader} -import scalaj.http.{Http, HttpResponse} -import java.net.{URI, URL} - -import scala.util.Try - - -/** - * This class extends [[RangeReader]] by reading chunks out of a GeoTiff at the - * specified HTTP location. - * - * @param url: A [[URL]] pointing to the desired GeoTiff. - */ -class HttpRangeReader(url: URL, useHeadRequest: Boolean) extends RangeReader with LazyLogging { - - val request = Http(url.toString) - - lazy val response: HttpResponse[String] = if(useHeadRequest) { - request.method("HEAD").asString - } - else { - request.method("GET").execute { is => "" } - } - - lazy val totalLength: Long = { - /** - * "The Accept-Ranges response HTTP header is a marker used by the server - * to advertise its support of partial requests. The value of this field - * indicates the unit that can be used to define a range." - * https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Ranges - */ - require(response.header("Accept-Ranges").contains("bytes"), "Server doesn't support ranged byte reads") - - val contentLength = response - .header("Content-Length") - .flatMap({ cl => Try(cl.toLong).toOption }) match { - case Some(num) => num - case None => -1L - } - - require(contentLength > 0, - "Server didn't provide (required) \"Content-Length\" headers, unable to do range-based read") - - contentLength - } - - def readClippedRange(start: Long, length: Int): Array[Byte] = { - //println("Range read", s"$start-${start + length} ($length bytes)") - - val res = request - .method("GET") - .header("Range", s"bytes=${start}-${start + length}") - .asBytes - - /** - * "If the byte-range-set is unsatisfiable, the server SHOULD return - * a response with a status of 416 (Requested range not satisfiable). - * Otherwise, the server SHOULD return a response with a status of 206 - * (Partial Content) containing the satisfiable ranges of the entity-body." - * https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html - */ - require(res.code != 416, - "Server unable to generate the byte range between ${start} and ${start + length}") - - if (res.code != 206) logger.info("Server responded to range request with HTTP code other than PARTIAL_RESPONSE (206)") - - res.body - } - -} - -/** The companion object of [[HttpRangeReader]] */ -object HttpRangeReader { - - def apply(address: String): HttpRangeReader = apply(new URL(address)) - - def apply(uri: URI): HttpRangeReader = apply(uri.toURL) - - /** - * Returns a new instance of HttpRangeReader. - * - * @param url: A [[URL]] pointing to the desired GeoTiff. - * @return A new instance of HttpRangeReader. - */ - def apply(url: URL): HttpRangeReader = new HttpRangeReader(url, true) - - /** - * Returns a new instance of HttpRangeReader which does not use HEAD - * to determine the totalLength. - * - * @param url: A [[URL]] pointing to the desired GeoTiff. - * @return A new instance of HttpRangeReader. - */ - def withoutHeadRequest(url: URL): HttpRangeReader = new HttpRangeReader(url, false) - - def withoutHeadRequest(address: String): HttpRangeReader = withoutHeadRequest(new URL(address)) - - def withoutHeadRequest(uri: URI): HttpRangeReader = withoutHeadRequest(uri.toURL) -} \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/LayerSpace.scala b/core/src/main/scala/astraea/spark/rasterframes/ref/LayerSpace.scala deleted file mode 100644 index 25a9d050c..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/ref/LayerSpace.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes.ref - -import astraea.spark.rasterframes.tiles.ProjectedRasterTile -import geotrellis.proj4.CRS -import geotrellis.raster._ -import geotrellis.raster.resample.ResampleMethod -import geotrellis.spark.tiling.LayoutDefinition -import geotrellis.spark.{SpatialKey, _} - - -/** - * NB: This package is only a temporary home for this. - * - * @since 9/5/18 - */ -case class LayerSpace( - crs: CRS, - cellType: CellType, - layout: LayoutDefinition, - resampleMethod: ResampleMethod = ResampleMethod.DEFAULT -) { - - def reproject(dest: CRS): LayerSpace = { - copy( - crs = dest, - layout = layout.copy(extent = layout.extent.reproject(crs, dest)) - ) - } - - def asTileLayerMetadata: TileLayerMetadata[SpatialKey] = { - val bounds = KeyBounds( - SpatialKey(0, 0), - SpatialKey(layout.layoutCols - 1, layout.layoutRows - 1) - ) - TileLayerMetadata(cellType, layout, layout.extent, crs, bounds) - } -} - -object LayerSpace { - - private[rasterframes] - def defaultLayout(prt: ProjectedRasterTile): LayoutDefinition = - LayoutDefinition(prt.extent, TileLayout(1, 1, prt.cols, prt.rows)) - - def from(rs: RasterSource): LayerSpace = new LayerSpace( - rs.crs, rs.cellType, LayoutDefinition(rs.extent, rs.nativeLayout - .getOrElse(TileLayout(1, 1, rs.cols, rs.rows)) - ) - ) - - def from(rr: RasterRef): LayerSpace = new LayerSpace( - rr.crs, rr.cellType, RasterRef.defaultLayout(rr) - ) - - def from(prt: ProjectedRasterTile): LayerSpace = new LayerSpace( - prt.crs, prt.cellType, defaultLayout(prt) - ) - -} diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala b/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala deleted file mode 100644 index 9dc9bd55e..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterSource.scala +++ /dev/null @@ -1,402 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea. Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * - */ - -package astraea.spark.rasterframes.ref - -import java.net.URI -import java.time.ZonedDateTime -import java.time.format.DateTimeFormatter - -import astraea.spark.rasterframes.NOMINAL_TILE_SIZE -import astraea.spark.rasterframes.model.TileContext -import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile -import astraea.spark.rasterframes.tiles.ProjectedRasterTile -import astraea.spark.rasterframes.util.GeoTiffInfoSupport -import com.typesafe.scalalogging.LazyLogging -import geotrellis.proj4.CRS -import geotrellis.raster._ -import geotrellis.raster.io.geotiff.reader.GeoTiffReader -import geotrellis.raster.io.geotiff.{GeoTiffSegmentLayout, MultibandGeoTiff, SinglebandGeoTiff, Tags} -import geotrellis.raster.split.Split -import geotrellis.spark.io.hadoop.HdfsRangeReader -import geotrellis.spark.io.s3.S3Client -import geotrellis.spark.io.s3.util.S3RangeReader -import geotrellis.spark.tiling.LayoutDefinition -import geotrellis.util.{FileRangeReader, RangeReader} -import geotrellis.vector.Extent -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.Experimental -import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder -import org.apache.spark.sql.rf.RasterSourceUDT - -import scala.util.Try - -/** - * Abstraction over fetching geospatial raster data. - * - * @since 8/21/18 - */ -@Experimental -sealed trait RasterSource extends ProjectedRasterLike with Serializable { - def crs: CRS - - def extent: Extent - - def timestamp: Option[ZonedDateTime] - - def cellType: CellType - - def bandCount: Int - - def tags: Option[Tags] - - def read(extent: Extent): Either[Raster[Tile], Raster[MultibandTile]] - - /** Reads the given extent as a single multiband raster. */ - def readMultiband(extent: Extent): Raster[MultibandTile] = - read(extent).fold(r => { - r.copy(tile = MultibandTile(r.tile)) - }, identity) - - def readAll(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]] - def readAllMultiband(): Seq[Raster[MultibandTile]] = - readAll().fold(_.map(r => { - r.copy(tile = MultibandTile(r.tile)) - }), identity) - - def readAllLazy(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]] = { - val extents = nativeTiling - if (bandCount == 1) { - val rasters = for { - extent ← extents - rr = RasterRef(this, Some(extent)) - tile: Tile = RasterRefTile(rr) - } yield Raster(tile, extent) - Left(rasters) - } - else { - // Need to figure this out. - RasterSource._logger.warn("Lazy reading is not available for multiband images. Performing eager read.") - val rasters = for { - extent ← extents - raster = this.read(extent).right.get - } yield raster - Right(rasters) - } - } - - def nativeLayout: Option[TileLayout] - - def rasterExtent = RasterExtent(extent, cols, rows) - - def cellSize = CellSize(extent, cols, rows) - - def gridExtent = GridExtent(extent, cellSize) - - def tileContext: TileContext = TileContext(extent, crs) - - def nativeTiling: Seq[Extent] = { - nativeLayout.map { tileLayout ⇒ - val layout = LayoutDefinition(extent, tileLayout) - val transform = layout.mapTransform - for { - col ← 0 until tileLayout.layoutCols - row ← 0 until tileLayout.layoutRows - } yield transform(col, row) - } - .getOrElse(Seq(extent)) - } -} - -object RasterSource extends LazyLogging { - implicit def rsEncoder: ExpressionEncoder[RasterSource] = { - RasterSourceUDT // Makes sure UDT is registered first - ExpressionEncoder() - } - - private def _logger = logger - - def apply(source: URI, callback: Option[ReadCallback] = None): RasterSource = - source.getScheme match { - case "http" | "https" ⇒ HttpGeoTiffRasterSource(source, callback) - case "file" ⇒ FileGeoTiffRasterSource(source, callback) - case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" ⇒ - // TODO: How can we get the active hadoop configuration - // TODO: without having to pass it through? - val config = () ⇒ new Configuration() - HadoopGeoTiffRasterSource(source, config, callback) - case "s3" ⇒ - val client = () ⇒ S3Client.DEFAULT - S3GeoTiffRasterSource(source, client, callback) - case s ⇒ throw new UnsupportedOperationException(s"Scheme '$s' not supported") - } - - - case class SimpleGeoTiffInfo( - cellType: CellType, - extent: Extent, - rasterExtent: RasterExtent, - crs: CRS, - tags: Tags, - segmentLayout: GeoTiffSegmentLayout, - bandCount: Int, - noDataValue: Option[Double] - ) - - object SimpleGeoTiffInfo { - def apply(info: GeoTiffReader.GeoTiffInfo): SimpleGeoTiffInfo = - SimpleGeoTiffInfo(info.cellType, info.extent, info.rasterExtent, info.crs, info.tags, info.segmentLayout, info.bandCount, info.noDataValue) - } - - // According to https://goo.gl/2z8xx9 the GeoTIFF date format is 'YYYY:MM:DD HH:MM:SS' - private val dateFormat = DateTimeFormatter.ofPattern("yyyy:MM:dd HH:mm:ss") - - trait URIRasterSource { _: RasterSource ⇒ - def source: URI - - abstract override def toString: String = { - s"${getClass.getSimpleName}(${source})" - } - } - - case class InMemoryRasterSource(tile: Tile, extent: Extent, crs: CRS) extends RasterSource { - def this(prt: ProjectedRasterTile) = this(prt, prt.extent, prt.crs) - - override def rows: Int = tile.rows - - override def cols: Int = tile.cols - - override def timestamp: Option[ZonedDateTime] = None - - override def cellType: CellType = tile.cellType - - override def bandCount: Int = 1 - - override def tags: Option[Tags] = None - - override def read(extent: Extent): Either[Raster[Tile], Raster[MultibandTile]] = Left( - Raster(tile.crop(rasterExtent.gridBoundsFor(extent, false)), extent) - ) - - override def nativeLayout: Option[TileLayout] = Some( - TileLayout( - layoutCols = math.ceil(this.cols.toDouble / NOMINAL_TILE_SIZE).toInt, - layoutRows = math.ceil(this.rows.toDouble / NOMINAL_TILE_SIZE).toInt, - tileCols = NOMINAL_TILE_SIZE, - tileRows = NOMINAL_TILE_SIZE) - ) - - def readAll(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]] = { - Left(Raster(tile, extent).split(nativeLayout.get, Split.Options(false, false)).toSeq) - } - } - - trait RangeReaderRasterSource extends RasterSource with GeoTiffInfoSupport with LazyLogging { - protected def rangeReader: RangeReader - - private def realInfo = - GeoTiffReader.readGeoTiffInfo(rangeReader, streaming = true, withOverviews = false) - - private lazy val tiffInfo = SimpleGeoTiffInfo(realInfo) - - def crs: CRS = tiffInfo.crs - - def extent: Extent = tiffInfo.extent - - def timestamp: Option[ZonedDateTime] = resolveDate - - override def cols: Int = tiffInfo.rasterExtent.cols - - override def rows: Int = tiffInfo.rasterExtent.rows - - def cellType: CellType = tiffInfo.cellType - - def bandCount: Int = tiffInfo.bandCount - - override def tags: Option[Tags] = Option(tiffInfo.tags) - - def nativeLayout: Option[TileLayout] = { - if (tiffInfo.segmentLayout.isTiled) - Some(tiffInfo.segmentLayout.tileLayout) - else None - } - - // TODO: Determine if this is the correct way to handle time. - protected def resolveDate: Option[ZonedDateTime] = { - tiffInfo.tags.headTags - .get(Tags.TIFFTAG_DATETIME) - .flatMap(ds ⇒ Try({ - logger.debug("Parsing header date: " + ds) - ZonedDateTime.parse(ds, dateFormat) - }).toOption) - } - - def read(extent: Extent): Either[Raster[Tile], Raster[MultibandTile]] = { - val info = realInfo - if (bandCount == 1) { - val geoTiffTile = GeoTiffReader.geoTiffSinglebandTile(info) - val gt = new SinglebandGeoTiff( - geoTiffTile, - info.extent, - info.crs, - info.tags, - info.options, - List.empty - ) - Left(gt.crop(extent).raster) - } - else { - val geoTiffTile = GeoTiffReader.geoTiffMultibandTile(info) - val gt = new MultibandGeoTiff( - geoTiffTile, - info.extent, - info.crs, - info.tags, - info.options, - List.empty - ) - Right(gt.crop(extent).raster) - } - } - - def readAll(): Either[Seq[Raster[Tile]], Seq[Raster[MultibandTile]]] = { - val info = realInfo - - // Thanks to @pomadchin for showing us how to do this :-) - val windows = info.segmentLayout.listWindows(NOMINAL_TILE_SIZE) - val re = info.rasterExtent - - if (info.bandCount == 1) { - val geotile = GeoTiffReader.geoTiffSinglebandTile(info) - - val rows = windows.map(gb ⇒ { - val tile = geotile.crop(gb) - val extent = re.extentFor(gb, clamp = false) - Raster(tile, extent) - }) - - Left(rows.toSeq) - } - else { - val geotile = GeoTiffReader.geoTiffMultibandTile(info) - - val rows = windows.map(gb ⇒ { - val tile = geotile.crop(gb) - val extent = re.extentFor(gb, clamp = false) - Raster(tile, extent) - }) - - Right(rows.toSeq) - } - } - } - - case class FileGeoTiffRasterSource(source: URI, callback: Option[ReadCallback]) extends RangeReaderRasterSource - with URIRasterSource with URIRasterSourceDebugString { self ⇒ - @transient - protected lazy val rangeReader = { - val base = FileRangeReader(source.getPath) - // TODO: DRY - callback.map(cb ⇒ ReportingRangeReader(base, cb, self)).getOrElse(base) - } - } - - case class HadoopGeoTiffRasterSource(source: URI, config: () ⇒ Configuration, callback: Option[ReadCallback]) extends RangeReaderRasterSource - with URIRasterSource with URIRasterSourceDebugString { self ⇒ - @transient - protected lazy val rangeReader = { - val base = HdfsRangeReader(new Path(source.getPath), config()) - callback.map(cb ⇒ ReportingRangeReader(base, cb, self)).getOrElse(base) - } - } - - case class S3GeoTiffRasterSource(source: URI, client: () ⇒ S3Client, callback: Option[ReadCallback]) extends RangeReaderRasterSource - with URIRasterSource with URIRasterSourceDebugString { self ⇒ - @transient - protected lazy val rangeReader = { - val base = S3RangeReader(source, client()) - callback.map(cb ⇒ ReportingRangeReader(base, cb, self)).getOrElse(base) - } - } - - case class HttpGeoTiffRasterSource(source: URI, callback: Option[ReadCallback]) extends RangeReaderRasterSource - with URIRasterSource with URIRasterSourceDebugString { self ⇒ - - @transient - protected lazy val rangeReader = { - val base = HttpRangeReader(source) - callback.map(cb ⇒ ReportingRangeReader(base, cb, self)).getOrElse(base) - } - - override protected def resolveDate: Option[ZonedDateTime] = { - super.resolveDate - .orElse { - val hrr = rangeReader match { - case h: HttpRangeReader ⇒ h - case ReportingRangeReader(h: HttpRangeReader, _, _) ⇒ h - } - hrr.response.headers.get("Last-Modified") - .flatMap(_.headOption) - .flatMap(s ⇒ Try(ZonedDateTime.parse(s, DateTimeFormatter.RFC_1123_DATE_TIME)).toOption) - } - } - } - - /** Trait for registering a callback for logging or monitoring range reads. - * NB: the callback will be invoked from within a Spark task, and therefore - * is serialized along with its closure to executors. */ - trait ReadCallback extends Serializable { - def readRange(source: RasterSource, start: Long, length: Int): Unit - } - - private case class ReportingRangeReader(delegate: RangeReader, callback: ReadCallback, parent: RasterSource) extends RangeReader { - override def totalLength: Long = delegate.totalLength - - override protected def readClippedRange(start: Long, length: Int): Array[Byte] = { - callback.readRange(parent, start, length) - delegate.readRange(start, length) - } - } - - trait URIRasterSourceDebugString { - _: RangeReaderRasterSource with URIRasterSource with Product ⇒ - def toDebugString: String = { - val buf = new StringBuilder() - buf.append(productPrefix) - buf.append("(") - buf.append("source=") - buf.append(source.toASCIIString) - buf.append(", size=") - buf.append(size) - buf.append(", dimensions=") - buf.append(dimensions) - buf.append(", crs=") - buf.append(crs) - buf.append(", extent=") - buf.append(extent) - buf.append(", timestamp=") - buf.append(timestamp) - buf.append(")") - buf.toString - } - } - -} \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialUDFSubstitutionRules.scala b/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialUDFSubstitutionRules.scala deleted file mode 100644 index c0f985b0b..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialUDFSubstitutionRules.scala +++ /dev/null @@ -1,21 +0,0 @@ -package astraea.spark.rasterframes.rules - -import astraea.spark.rasterframes.expressions.SpatialRelation -import org.apache.spark.sql.catalyst.expressions.ScalaUDF -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.rules.Rule - -/** - * Swaps out spatial relation UDFs for expression forms. - * - * @since 2/19/18 - */ -object SpatialUDFSubstitutionRules extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = { - plan.transform { - case q: LogicalPlan ⇒ q.transformExpressions { - case s: ScalaUDF ⇒ SpatialRelation.fromUDF(s).getOrElse(s) - } - } - } -} diff --git a/core/src/main/scala/astraea/spark/rasterframes/tiles/DelegatingTile.scala b/core/src/main/scala/astraea/spark/rasterframes/tiles/DelegatingTile.scala deleted file mode 100644 index 8f0a910c7..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/tiles/DelegatingTile.scala +++ /dev/null @@ -1,109 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes.tiles - -import geotrellis.raster._ - -/** - * A tile that wraps another tile. Originally intended for delayed reading, but useful in other special use cases. - * - * @since 8/22/18 - */ -trait DelegatingTile extends Tile { - protected def delegate: Tile - - def cellType: CellType = - delegate.cellType - - def cols: Int = - delegate.cols - - def rows: Int = - delegate.rows - - def mutable: MutableArrayTile = - delegate.mutable - - def convert(cellType: CellType): Tile = - delegate.convert(cellType) - - override def withNoData(noDataValue: Option[Double]): Tile = - delegate.withNoData(noDataValue) - - def interpretAs(newCellType: CellType): Tile = - delegate.interpretAs(newCellType) - - def get(col: Int, row: Int): Int = - delegate.get(col, row) - - def getDouble(col: Int, row: Int): Double = - delegate.getDouble(col, row) - - def toArrayTile(): ArrayTile = - delegate.toArrayTile() - - def toArray(): Array[Int] = - delegate.toArray() - - def toArrayDouble(): Array[Double] = - delegate.toArrayDouble() - - def toBytes(): Array[Byte] = - delegate.toBytes() - - def foreach(f: Int ⇒ Unit): Unit = - delegate.foreach(f) - - def foreachDouble(f: Double ⇒ Unit): Unit = - delegate.foreachDouble(f) - - def map(f: Int ⇒ Int): Tile = - delegate.map(f) - - def combine(r2: Tile)(f: (Int, Int) ⇒ Int): Tile = (delegate, r2) match { - // Hack until https://github.com/locationtech/geotrellis/issues/2792 - case (del: ArrayTile, r2: DelegatingTile) ⇒ del.combine(r2.toArrayTile())(f) - case _ ⇒ delegate.combine(r2)(f) - } - - def combineDouble(r2: Tile)(f: (Double, Double) ⇒ Double): Tile = (delegate, r2) match { - // Hack until https://github.com/locationtech/geotrellis/issues/2792 - case (del: ArrayTile, r2: DelegatingTile) ⇒ del.combineDouble(r2.toArrayTile())(f) - case _ ⇒ delegate.combineDouble(r2)(f) - } - - def mapDouble(f: Double ⇒ Double): Tile = - delegate.mapDouble(f) - - def foreachIntVisitor(visitor: IntTileVisitor): Unit = - delegate.foreachIntVisitor(visitor) - - def foreachDoubleVisitor(visitor: DoubleTileVisitor): Unit = - delegate.foreachDoubleVisitor(visitor) - - def mapIntMapper(mapper: IntTileMapper): Tile = - delegate.mapIntMapper(mapper) - - def mapDoubleMapper(mapper: DoubleTileMapper): Tile = - delegate.mapDoubleMapper(mapper) - -} diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/ReadAccumulator.scala b/core/src/main/scala/astraea/spark/rasterframes/util/ReadAccumulator.scala deleted file mode 100644 index f9cdfb48e..000000000 --- a/core/src/main/scala/astraea/spark/rasterframes/util/ReadAccumulator.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes.util - -import astraea.spark.rasterframes.ref.RasterSource -import astraea.spark.rasterframes.ref.RasterSource.ReadCallback -import com.typesafe.scalalogging.LazyLogging -import org.apache.spark.SparkContext -import org.apache.spark.util.LongAccumulator - -/** - * Support for keeping counts of read operations from RasterSource-s - * - * @since 9/3/18 - */ -case class ReadAccumulator(reads: () ⇒ LongAccumulator, bytes: () ⇒ LongAccumulator) extends ReadCallback { - override def readRange(source: RasterSource, start: Long, length: Int): Unit = { - reads().add(1) - bytes().add(length) - } - override def toString: String = - s"${productPrefix}(reads=${reads().value}, bytes=${bytes().value})" -} - -object ReadAccumulator extends LazyLogging { - def apply(sc: SparkContext, prefix: String): ReadAccumulator = this.synchronized { - val reads = sc.longAccumulator(prefix + ".reads") - val bytes = sc.longAccumulator(prefix + ".bytes") - new ReadAccumulator(() ⇒ reads, () ⇒ bytes) - } -} \ No newline at end of file diff --git a/core/src/main/scala/org/apache/spark/sql/rf/FilterTranslator.scala b/core/src/main/scala/org/apache/spark/sql/rf/FilterTranslator.scala index edecc44b5..6433ef8d3 100644 --- a/core/src/main/scala/org/apache/spark/sql/rf/FilterTranslator.scala +++ b/core/src/main/scala/org/apache/spark/sql/rf/FilterTranslator.scala @@ -19,8 +19,8 @@ package org.apache.spark.sql.rf import java.sql.{Date, Timestamp} -import astraea.spark.rasterframes.expressions.SpatialRelation.{Contains, Intersects} -import astraea.spark.rasterframes.rules._ +import org.locationtech.rasterframes.expressions.SpatialRelation.{Contains, Intersects} +import org.locationtech.rasterframes.rules._ import org.apache.spark.sql.catalyst.CatalystTypeConverters.{convertToScala, createToScalaConverter} import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions.{Attribute, EmptyRow, Expression, Literal} @@ -30,6 +30,7 @@ import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types.{DateType, StringType, TimestampType} import org.apache.spark.unsafe.types.UTF8String import org.locationtech.geomesa.spark.jts.rules.GeometryLiteral +import org.locationtech.rasterframes.rules.{SpatialFilters, TemporalFilters} /** * This is a copy of [[org.apache.spark.sql.execution.datasources.DataSourceStrategy.translateFilter]], modified to add our spatial predicates. diff --git a/core/src/main/scala/org/apache/spark/sql/rf/RasterSourceUDT.scala b/core/src/main/scala/org/apache/spark/sql/rf/RasterSourceUDT.scala index 2dee38a6e..772bde6fe 100644 --- a/core/src/main/scala/org/apache/spark/sql/rf/RasterSourceUDT.scala +++ b/core/src/main/scala/org/apache/spark/sql/rf/RasterSourceUDT.scala @@ -23,12 +23,12 @@ package org.apache.spark.sql.rf import java.nio.ByteBuffer -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.ref.RasterSource -import astraea.spark.rasterframes.util.KryoSupport +import org.locationtech.rasterframes.encoders.CatalystSerializer._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.{DataType, UDTRegistration, UserDefinedType, _} +import org.locationtech.rasterframes.encoders.CatalystSerializer +import org.locationtech.rasterframes.ref.RasterSource +import org.locationtech.rasterframes.util.KryoSupport /** * Catalyst representation of a RasterSource. @@ -40,11 +40,11 @@ class RasterSourceUDT extends UserDefinedType[RasterSource] { import RasterSourceUDT._ override def typeName = "rf_rastersource" - override def pyUDT: String = "pyrasterframes.RasterSourceUDT" + override def pyUDT: String = "pyrasterframes.rf_types.RasterSourceUDT" def userClass: Class[RasterSource] = classOf[RasterSource] - override def sqlType: DataType = CatalystSerializer[RasterSource].schema + override def sqlType: DataType = schemaOf[RasterSource] override def serialize(obj: RasterSource): InternalRow = Option(obj) @@ -65,9 +65,12 @@ class RasterSourceUDT extends UserDefinedType[RasterSource] { } } -object RasterSourceUDT extends RasterSourceUDT { +object RasterSourceUDT { UDTRegistration.register(classOf[RasterSource].getName, classOf[RasterSourceUDT].getName) + /** Deserialize a byte array, also used inside the Python API */ + def from(byteArray: Array[Byte]): RasterSource = CatalystSerializer.CatalystIO.rowIO.create(byteArray).to[RasterSource] + implicit val rasterSourceSerializer: CatalystSerializer[RasterSource] = new CatalystSerializer[RasterSource] { override def schema: StructType = StructType(Seq( diff --git a/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala b/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala index 75ac0f7cf..66c0d98a1 100644 --- a/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala +++ b/core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala @@ -20,14 +20,13 @@ */ package org.apache.spark.sql.rf - -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.model.{Cells, TileDataContext} -import astraea.spark.rasterframes.tiles.InternalRowTile import geotrellis.raster._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.{DataType, _} +import org.locationtech.rasterframes.encoders.CatalystSerializer +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.model.{Cells, TileDataContext} +import org.locationtech.rasterframes.tiles.InternalRowTile /** @@ -40,11 +39,11 @@ class TileUDT extends UserDefinedType[Tile] { import TileUDT._ override def typeName = TileUDT.typeName - override def pyUDT: String = "pyrasterframes.TileUDT" + override def pyUDT: String = "pyrasterframes.rf_types.TileUDT" def userClass: Class[Tile] = classOf[Tile] - def sqlType: StructType = CatalystSerializer[Tile].schema + def sqlType: StructType = schemaOf[Tile] override def serialize(obj: Tile): InternalRow = Option(obj) @@ -57,7 +56,7 @@ class TileUDT extends UserDefinedType[Tile] { case ir: InternalRow ⇒ ir.to[Tile] } .map { - case realIRT: InternalRowTile ⇒ realIRT.toArrayTile() + case realIRT: InternalRowTile ⇒ realIRT.realizedTile case other ⇒ other } .orNull @@ -74,11 +73,10 @@ case object TileUDT { final val typeName: String = "tile" implicit def tileSerializer: CatalystSerializer[Tile] = new CatalystSerializer[Tile] { - import scala.language.reflectiveCalls override def schema: StructType = StructType(Seq( - StructField("cell_context", CatalystSerializer[TileDataContext].schema, false), - StructField("cell_data", CatalystSerializer[Cells].schema, false) + StructField("cell_context", schemaOf[TileDataContext], false), + StructField("cell_data", schemaOf[Cells], false) )) override def to[R](t: Tile, io: CatalystIO[R]): R = io.create( diff --git a/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala b/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala index b9eb96981..81418d466 100644 --- a/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala +++ b/core/src/main/scala/org/apache/spark/sql/rf/VersionShims.scala @@ -1,18 +1,18 @@ package org.apache.spark.sql.rf -import java.lang.reflect.{Constructor, Method} +import java.lang.reflect.Constructor import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.FunctionRegistry -import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.{FunctionBuilder, expressionInfo} +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.catalog.CatalogTable -import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SQLContext} -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryExpression, Expression, ExpressionDescription, ExpressionInfo, RuntimeReplaceable, ScalaUDF} import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, InvokeLike} +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ExpressionDescription, ExpressionInfo} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SQLContext} import scala.reflect._ import scala.util.{Failure, Success, Try} diff --git a/core/src/main/scala/org/locationtech/rasterframes/MetadataKeys.scala b/core/src/main/scala/org/locationtech/rasterframes/MetadataKeys.scala new file mode 100644 index 000000000..9eb4000d9 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/MetadataKeys.scala @@ -0,0 +1,34 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes + +/** + * + * @since 2/19/18 + */ +trait MetadataKeys { + /** Key under which ContextRDD metadata is stored. */ + private[rasterframes] val CONTEXT_METADATA_KEY = "_context" + + /** Key under which RasterFrameLayer role a column plays. */ + private[rasterframes] val SPATIAL_ROLE_KEY = "_stRole" +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/PairRDDConverter.scala b/core/src/main/scala/org/locationtech/rasterframes/PairRDDConverter.scala similarity index 86% rename from core/src/main/scala/astraea/spark/rasterframes/PairRDDConverter.scala rename to core/src/main/scala/org/locationtech/rasterframes/PairRDDConverter.scala index 83686dbfd..658c0d65d 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/PairRDDConverter.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/PairRDDConverter.scala @@ -1,8 +1,29 @@ -package astraea.spark.rasterframes +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes.util._ import geotrellis.raster.{MultibandTile, Tile, TileFeature} -import geotrellis.spark.{SpaceTimeKey, SpatialKey, TemporalKey} +import geotrellis.spark.{SpaceTimeKey, SpatialKey} import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.rf.TileUDT @@ -15,7 +36,7 @@ import scala.annotation.implicitNotFound * * @since 4/8/18 */ -@implicitNotFound("An RDD converter is required create a RasterFrame. " + +@implicitNotFound("An RDD converter is required create a RasterFrameLayer. " + "Please provide an implementation of PairRDDConverter[${K}, ${V}].") trait PairRDDConverter[K, V] extends Serializable { val schema: StructType diff --git a/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala b/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala new file mode 100644 index 000000000..20b11c679 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/RasterFunctions.scala @@ -0,0 +1,430 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2017 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes +import geotrellis.proj4.CRS +import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp +import geotrellis.raster.{CellType, Tile} +import geotrellis.vector.Extent +import org.apache.spark.annotation.Experimental +import org.apache.spark.sql.functions.{lit, udf} +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.jts.geom.Geometry +import org.locationtech.rasterframes.expressions.TileAssembler +import org.locationtech.rasterframes.expressions.accessors._ +import org.locationtech.rasterframes.expressions.aggregates._ +import org.locationtech.rasterframes.expressions.generators._ +import org.locationtech.rasterframes.expressions.localops._ +import org.locationtech.rasterframes.expressions.tilestats._ +import org.locationtech.rasterframes.expressions.transformers._ +import org.locationtech.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.stats._ +import org.locationtech.rasterframes.{functions => F} + +/** + * UDFs for working with Tiles in Spark DataFrames. + * + * @since 4/3/17 + */ +trait RasterFunctions { + import util._ + + // format: off + /** Query the number of (cols, rows) in a Tile. */ + def rf_dimensions(col: Column): TypedColumn[Any, TileDimensions] = GetDimensions(col) + + /** Extracts the bounding box of a geometry as an Extent */ + def st_extent(col: Column): TypedColumn[Any, Extent] = GeometryToExtent(col) + + /** Extracts the bounding box from a RasterSource or ProjectedRasterTile */ + def rf_extent(col: Column): TypedColumn[Any, Extent] = GetExtent(col) + + /** Extracts the CRS from a RasterSource or ProjectedRasterTile */ + def rf_crs(col: Column): TypedColumn[Any, CRS] = GetCRS(col) + + /** Extracts the tile from a ProjectedRasterTile, or passes through a Tile. */ + def rf_tile(col: Column): TypedColumn[Any, Tile] = RealizeTile(col) + + /** Flattens Tile into a double array. */ + def rf_tile_to_array_double(col: Column): TypedColumn[Any, Array[Double]] = + TileToArrayDouble(col) + + /** Flattens Tile into an integer array. */ + def rf_tile_to_array_int(col: Column): TypedColumn[Any, Array[Double]] = + TileToArrayDouble(col) + + @Experimental + /** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/ + def rf_array_to_tile(arrayCol: Column, cols: Int, rows: Int) = withAlias("rf_array_to_tile", arrayCol)( + udf[Tile, AnyRef](F.arrayToTile(cols, rows)).apply(arrayCol) + ) + + /** Create a Tile from a column of cell data with location indexes and preform cell conversion. */ + def rf_assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] = + rf_convert_cell_type(TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile](singlebandTileEncoder) + + /** Create a Tile from a column of cell data with location indexes. */ + def rf_assemble_tile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] = + TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows) + + /** Extract the Tile's cell type */ + def rf_cell_type(col: Column): TypedColumn[Any, CellType] = GetCellType(col) + + /** Change the Tile's cell type */ + def rf_convert_cell_type(col: Column, cellType: CellType): TypedColumn[Any, Tile] = + SetCellType(col, cellType) + + /** Change the Tile's cell type */ + def rf_convert_cell_type(col: Column, cellTypeName: String): TypedColumn[Any, Tile] = + SetCellType(col, cellTypeName) + + /** Resample tile to different size based on scalar factor or tile whose dimension to match. Scalar less + * than one will downsample tile; greater than one will upsample. Uses nearest-neighbor. */ + def rf_resample[T: Numeric](tileCol: Column, factorValue: T) = Resample(tileCol, factorValue) + + /** Resample tile to different size based on scalar factor or tile whose dimension to match. Scalar less + * than one will downsample tile; greater than one will upsample. Uses nearest-neighbor. */ + def rf_resample(tileCol: Column, factorCol: Column) = Resample(tileCol, factorCol) + + /** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */ + def st_geometry(extent: Column): TypedColumn[Any, Geometry] = ExtentToGeometry(extent) + + /** Extract the extent of a RasterSource or ProjectedRasterTile as a Geometry type. */ + def rf_geometry(raster: Column): TypedColumn[Any, Geometry] = GetGeometry(raster) + + /** Assign a `NoData` value to the Tiles. */ + def rf_with_no_data(col: Column, nodata: Double): TypedColumn[Any, Tile] = withTypedAlias("rf_with_no_data", col)( + udf[Tile, Tile](F.withNoData(nodata)).apply(col) + ) + + /** Compute the full column aggregate floating point histogram. */ + def rf_agg_approx_histogram(col: Column): TypedColumn[Any, CellHistogram] = + HistogramAggregate(col) + + /** Compute the full column aggregate floating point statistics. */ + def rf_agg_stats(col: Column): TypedColumn[Any, CellStatistics] = + CellStatsAggregate(col) + + /** Computes the column aggregate mean. */ + def rf_agg_mean(col: Column) = CellMeanAggregate(col) + + /** Computes the number of non-NoData cells in a column. */ + def rf_agg_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.DataCells(col) + + /** Computes the number of NoData cells in a column. */ + def rf_agg_no_data_cells(col: Column): TypedColumn[Any, Long] = CellCountAggregate.NoDataCells(col) + + /** Compute the Tile-wise mean */ + def rf_tile_mean(col: Column): TypedColumn[Any, Double] = + TileMean(col) + + /** Compute the Tile-wise sum */ + def rf_tile_sum(col: Column): TypedColumn[Any, Double] = + Sum(col) + + /** Compute the minimum cell value in tile. */ + def rf_tile_min(col: Column): TypedColumn[Any, Double] = + TileMin(col) + + /** Compute the maximum cell value in tile. */ + def rf_tile_max(col: Column): TypedColumn[Any, Double] = + TileMax(col) + + /** Compute TileHistogram of Tile values. */ + def rf_tile_histogram(col: Column): TypedColumn[Any, CellHistogram] = + TileHistogram(col) + + /** Compute statistics of Tile values. */ + def rf_tile_stats(col: Column): TypedColumn[Any, CellStatistics] = + TileStats(col) + + /** Counts the number of non-NoData cells per Tile. */ + def rf_data_cells(tile: Column): TypedColumn[Any, Long] = + DataCells(tile) + + /** Counts the number of NoData cells per Tile. */ + def rf_no_data_cells(tile: Column): TypedColumn[Any, Long] = + NoDataCells(tile) + + /** Returns true if all cells in the tile are NoData.*/ + def rf_is_no_data_tile(tile: Column): TypedColumn[Any, Boolean] = + IsNoDataTile(tile) + + /** Returns true if any cells in the tile are true (non-zero and not NoData). */ + def rf_exists(tile: Column): TypedColumn[Any, Boolean] = Exists(tile) + + /** Returns true if all cells in the tile are true (non-zero and not NoData). */ + def rf_for_all(tile: Column): TypedColumn[Any, Boolean] = ForAll(tile) + + /** Compute cell-local aggregate descriptive statistics for a column of Tiles. */ + def rf_agg_local_stats(col: Column) = + LocalStatsAggregate(col) + + /** Compute the cell-wise/local max operation between Tiles in a column. */ + def rf_agg_local_max(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMaxUDAF(col) + + /** Compute the cellwise/local min operation between Tiles in a column. */ + def rf_agg_local_min(col: Column): TypedColumn[Any, Tile] = LocalTileOpAggregate.LocalMinUDAF(col) + + /** Compute the cellwise/local mean operation between Tiles in a column. */ + def rf_agg_local_mean(col: Column): TypedColumn[Any, Tile] = LocalMeanAggregate(col) + + /** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */ + def rf_agg_local_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalDataCellsUDAF(col) + + /** Compute the cellwise/local count of NoData cells for all Tiles in a column. */ + def rf_agg_local_no_data_cells(col: Column): TypedColumn[Any, Tile] = LocalCountAggregate.LocalNoDataCellsUDAF(col) + + /** Cellwise addition between two Tiles or Tile and scalar column. */ + def rf_local_add(left: Column, right: Column): TypedColumn[Any, Tile] = Add(left, right) + + /** Cellwise addition of a scalar value to a tile. */ + def rf_local_add[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Add(tileCol, value) + + /** Cellwise subtraction between two Tiles. */ + def rf_local_subtract(left: Column, right: Column): TypedColumn[Any, Tile] = Subtract(left, right) + + /** Cellwise subtraction of a scalar value from a tile. */ + def rf_local_subtract[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Subtract(tileCol, value) + + /** Cellwise multiplication between two Tiles. */ + def rf_local_multiply(left: Column, right: Column): TypedColumn[Any, Tile] = Multiply(left, right) + + /** Cellwise multiplication of a tile by a scalar value. */ + def rf_local_multiply[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Multiply(tileCol, value) + + /** Cellwise division between two Tiles. */ + def rf_local_divide(left: Column, right: Column): TypedColumn[Any, Tile] = Divide(left, right) + + /** Cellwise division of a tile by a scalar value. */ + def rf_local_divide[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = Divide(tileCol, value) + + /** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */ + def rf_local_algebra(op: LocalTileBinaryOp, left: Column, right: Column): TypedColumn[Any, Tile] = + withTypedAlias(opName(op), left, right)(udf[Tile, Tile, Tile](op.apply).apply(left, right)) + + /** Compute the normalized difference of two tile columns */ + def rf_normalized_difference(left: Column, right: Column) = + NormalizedDifference(left, right) + + /** Constructor for tile column with a single cell value. */ + def rf_make_constant_tile(value: Number, cols: Int, rows: Int, cellType: CellType): TypedColumn[Any, Tile] = + rf_make_constant_tile(value, cols, rows, cellType.name) + + /** Constructor for tile column with a single cell value. */ + def rf_make_constant_tile(value: Number, cols: Int, rows: Int, cellTypeName: String): TypedColumn[Any, Tile] = { + import org.apache.spark.sql.rf.TileUDT.tileSerializer + val constTile = encoders.serialized_literal(F.makeConstantTile(value, cols, rows, cellTypeName)) + withTypedAlias(s"rf_make_constant_tile($value, $cols, $rows, $cellTypeName)")(constTile) + } + + /** Create a column constant tiles of zero */ + def rf_make_zeros_tile(cols: Int, rows: Int, cellType: CellType): TypedColumn[Any, Tile] = + rf_make_zeros_tile(cols, rows, cellType.name) + + /** Create a column constant tiles of zero */ + def rf_make_zeros_tile(cols: Int, rows: Int, cellTypeName: String): TypedColumn[Any, Tile] = { + import org.apache.spark.sql.rf.TileUDT.tileSerializer + val constTile = encoders.serialized_literal(F.tileZeros(cols, rows, cellTypeName)) + withTypedAlias(s"rf_make_zeros_tile($cols, $rows, $cellTypeName)")(constTile) + } + + /** Creates a column of tiles containing all ones */ + def rf_make_ones_tile(cols: Int, rows: Int, cellType: CellType): TypedColumn[Any, Tile] = + rf_make_ones_tile(cols, rows, cellType.name) + + /** Creates a column of tiles containing all ones */ + def rf_make_ones_tile(cols: Int, rows: Int, cellTypeName: String): TypedColumn[Any, Tile] = { + import org.apache.spark.sql.rf.TileUDT.tileSerializer + val constTile = encoders.serialized_literal(F.tileOnes(cols, rows, cellTypeName)) + withTypedAlias(s"rf_make_ones_tile($cols, $rows, $cellTypeName)")(constTile) + } + + /** Where the rf_mask tile contains NODATA, replace values in the source tile with NODATA */ + def rf_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] = + Mask.MaskByDefined(sourceTile, maskTile) + + /** Where the `maskTile` equals `maskValue`, replace values in the source tile with `NoData` */ + def rf_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] = + Mask.MaskByValue(sourceTile, maskTile, maskValue) + + /** Where the `maskTile` does **not** contain `NoData`, replace values in the source tile with `NoData` */ + def rf_inverse_mask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] = + Mask.InverseMaskByDefined(sourceTile, maskTile) + + /** Where the `maskTile` does **not** equal `maskValue`, replace values in the source tile with `NoData` */ + def rf_inverse_mask_by_value(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] = + Mask.InverseMaskByValue(sourceTile, maskTile, maskValue) + + /** Create a tile where cells in the grid defined by cols, rows, and bounds are filled with the given value. */ + def rf_rasterize(geometry: Column, bounds: Column, value: Column, cols: Int, rows: Int): TypedColumn[Any, Tile] = + withTypedAlias("rf_rasterize", geometry)( + udf(F.rasterize(_: Geometry, _: Geometry, _: Int, cols, rows)).apply(geometry, bounds, value) + ) + + def rf_rasterize(geometry: Column, bounds: Column, value: Column, cols: Column, rows: Column): TypedColumn[Any, Tile] = + withTypedAlias("rf_rasterize", geometry)( + udf(F.rasterize).apply(geometry, bounds, value, cols, rows) + ) + + /** Reproject a column of geometry from one CRS to another. + * @param sourceGeom Geometry column to reproject + * @param srcCRS Native CRS of `sourceGeom` as a literal + * @param dstCRSCol Destination CRS as a column + */ + def st_reproject(sourceGeom: Column, srcCRS: CRS, dstCRSCol: Column): TypedColumn[Any, Geometry] = + ReprojectGeometry(sourceGeom, srcCRS, dstCRSCol) + + /** Reproject a column of geometry from one CRS to another. + * @param sourceGeom Geometry column to reproject + * @param srcCRSCol Native CRS of `sourceGeom` as a column + * @param dstCRS Destination CRS as a literal + */ + def st_reproject(sourceGeom: Column, srcCRSCol: Column, dstCRS: CRS): TypedColumn[Any, Geometry] = + ReprojectGeometry(sourceGeom, srcCRSCol, dstCRS) + + /** Reproject a column of geometry from one CRS to another. + * @param sourceGeom Geometry column to reproject + * @param srcCRS Native CRS of `sourceGeom` as a literal + * @param dstCRS Destination CRS as a literal + */ + def st_reproject(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] = + ReprojectGeometry(sourceGeom, srcCRS, dstCRS) + + /** Reproject a column of geometry from one CRS to another. + * @param sourceGeom Geometry column to reproject + * @param srcCRSCol Native CRS of `sourceGeom` as a column + * @param dstCRSCol Destination CRS as a column + */ + def st_reproject(sourceGeom: Column, srcCRSCol: Column, dstCRSCol: Column): TypedColumn[Any, Geometry] = + ReprojectGeometry(sourceGeom, srcCRSCol, dstCRSCol) + + /** Render Tile as ASCII string, for debugging purposes. */ + def rf_render_ascii(col: Column): TypedColumn[Any, String] = + DebugRender.RenderAscii(col) + + /** Render Tile cell values as numeric values, for debugging purposes. */ + def rf_render_matrix(col: Column): TypedColumn[Any, String] = + DebugRender.RenderMatrix(col) + + /** Cellwise less than value comparison between two tiles. */ + def rf_local_less(left: Column, right: Column): TypedColumn[Any, Tile] = + Less(left, right) + + /** Cellwise less than value comparison between a tile and a scalar. */ + def rf_local_less[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = + Less(tileCol, value) + + /** Cellwise less than or equal to value comparison between a tile and a scalar. */ + def rf_local_less_equal(left: Column, right: Column): TypedColumn[Any, Tile] = + LessEqual(left, right) + + /** Cellwise less than or equal to value comparison between a tile and a scalar. */ + def rf_local_less_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = + LessEqual(tileCol, value) + + /** Cellwise greater than value comparison between two tiles. */ + def rf_local_greater(left: Column, right: Column): TypedColumn[Any, Tile] = + Greater(left, right) + + /** Cellwise greater than value comparison between a tile and a scalar. */ + def rf_local_greater[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = + Greater(tileCol, value) + + /** Cellwise greater than or equal to value comparison between two tiles. */ + def rf_local_greater_equal(left: Column, right: Column): TypedColumn[Any, Tile] = + GreaterEqual(left, right) + + /** Cellwise greater than or equal to value comparison between a tile and a scalar. */ + def rf_local_greater_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = + GreaterEqual(tileCol, value) + + /** Cellwise equal to value comparison between two tiles. */ + def rf_local_equal(left: Column, right: Column): TypedColumn[Any, Tile] = + Equal(left, right) + + /** Cellwise equal to value comparison between a tile and a scalar. */ + def rf_local_equal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = + Equal(tileCol, value) + + /** Cellwise inequality comparison between two tiles. */ + def rf_local_unequal(left: Column, right: Column): TypedColumn[Any, Tile] = + Unequal(left, right) + + /** Cellwise inequality comparison between a tile and a scalar. */ + def rf_local_unequal[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = + Unequal(tileCol, value) + + /** Round cell values to nearest integer without chaning cell type. */ + def rf_round(tileCol: Column): TypedColumn[Any, Tile] = + Round(tileCol) + + /** Compute the absolute value of each cell. */ + def rf_abs(tileCol: Column): TypedColumn[Any, Tile] = + Abs(tileCol) + + /** Take natural logarithm of cell values. */ + def rf_log(tileCol: Column): TypedColumn[Any, Tile] = + Log(tileCol) + + /** Take base 10 logarithm of cell values. */ + def rf_log10(tileCol: Column): TypedColumn[Any, Tile] = + Log10(tileCol) + + /** Take base 2 logarithm of cell values. */ + def rf_log2(tileCol: Column): TypedColumn[Any, Tile] = + Log2(tileCol) + + /** Natural logarithm of one plus cell values. */ + def rf_log1p(tileCol: Column): TypedColumn[Any, Tile] = + Log1p(tileCol) + + /** Exponential of cell values */ + def rf_exp(tileCol: Column): TypedColumn[Any, Tile] = + Exp(tileCol) + + /** Ten to the power of cell values */ + def rf_exp10(tileCol: Column): TypedColumn[Any, Tile] = + Exp10(tileCol) + + /** Two to the power of cell values */ + def rf_exp2(tileCol: Column): TypedColumn[Any, Tile] = + Exp2(tileCol) + + /** Exponential of cell values, less one*/ + def rf_expm1(tileCol: Column): TypedColumn[Any, Tile] = + ExpM1(tileCol) + + /** Return the incoming tile untouched. */ + def rf_identity(tileCol: Column): TypedColumn[Any, Tile] = + Identity(tileCol) + + /** Create a row for each cell in Tile. */ + def rf_explode_tiles(cols: Column*): Column = rf_explode_tiles_sample(1.0, None, cols: _*) + + /** Create a row for each cell in Tile with random sampling and optional seed. */ + def rf_explode_tiles_sample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column = + ExplodeTiles(sampleFraction, seed, cols) + + /** Create a row for each cell in Tile with random sampling (no seed). */ + def rf_explode_tiles_sample(sampleFraction: Double, cols: Column*): Column = + ExplodeTiles(sampleFraction, None, cols) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/StandardColumns.scala b/core/src/main/scala/org/locationtech/rasterframes/StandardColumns.scala new file mode 100644 index 000000000..2e82ab356 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/StandardColumns.scala @@ -0,0 +1,93 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes + +import java.sql.Timestamp + +import geotrellis.proj4.CRS +import geotrellis.raster.Tile +import geotrellis.spark.{SpatialKey, TemporalKey} +import geotrellis.vector.{Extent, ProjectedExtent} +import org.apache.spark.sql.functions.col +import org.locationtech.jts.geom.{Point => jtsPoint, Polygon => jtsPolygon} +import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._ +import org.locationtech.rasterframes.tiles.ProjectedRasterTile + +/** + * Constants identifying column in most RasterFrames. + * + * @since 2/19/18 + */ +trait StandardColumns { + /** Default RasterFrameLayer spatial column name. */ + val SPATIAL_KEY_COLUMN = col("spatial_key").as[SpatialKey] + + /** Default RasterFrameLayer temporal column name. */ + val TEMPORAL_KEY_COLUMN = col("temporal_key").as[TemporalKey] + + /** Default RasterFrameLayer timestamp column name */ + val TIMESTAMP_COLUMN = col("timestamp").as[Timestamp] + + /** Default RasterFrameLayer column name for an tile extent as geometry value. */ + // This is a `def` because `PolygonUDT` needs to be initialized first. + def GEOMETRY_COLUMN = col("geometry").as[jtsPolygon] + + /** Default RasterFrameLayer column name for the center coordinates of the tile's bounds. */ + // This is a `def` because `PointUDT` needs to be initialized first. + def CENTER_COLUMN = col("center").as[jtsPoint] + + /** Default Extent column name. */ + def EXTENT_COLUMN = col("extent").as[Extent] + + /** Default ProjectedExtent column name. */ + def PROJECTED_EXTENT_COLUMN = col("proj_extent").as[ProjectedExtent] + + /** Default CRS column name. */ + def CRS_COLUMN = col("crs").as[CRS] + + /** Default RasterFrameLayer column name for an added spatial index. */ + val SPATIAL_INDEX_COLUMN = col("spatial_index").as[Long] + + /** Default RasterFrameLayer tile column name. */ + // This is a `def` because `TileUDT` needs to be initialized first. + def TILE_COLUMN = col("tile").as[Tile] + + /** Default column name for a tile with its CRS and Extent. */ + def PROJECTED_RASTER_COLUMN = col("proj_raster").as[ProjectedRasterTile] + + /** Default RasterFrameLayer `TileFeature.data` column name. */ + val TILE_FEATURE_DATA_COLUMN = col("tile_data") + + /** Default GeoTiff tags column. */ + val METADATA_COLUMN = col("metadata").as[Map[String, String]] + + /** Default column index column for the cells of exploded tiles. */ + val COLUMN_INDEX_COLUMN = col("column_index").as[Int] + + /** Default teil column index column for the cells of exploded tiles. */ + val ROW_INDEX_COLUMN = col("row_index").as[Int] + + /** URI/URL/S3 path to raster. */ + val PATH_COLUMN = col("path").as[String] +} + +object StandardColumns extends StandardColumns diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CRSEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/CRSEncoder.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/CRSEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/CRSEncoder.scala index b6a188d71..39ed8d6f3 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CRSEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/CRSEncoder.scala @@ -15,13 +15,14 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders - -import astraea.spark.rasterframes.util.CRSParser +package org.locationtech.rasterframes.encoders import geotrellis.proj4.CRS import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.locationtech.rasterframes.model.LazyCRS /** * Custom encoder for GT `CRS`. @@ -33,5 +34,5 @@ object CRSEncoder { "crsProj4", "toProj4String", (CRSEncoder.getClass, "fromString") ) // Not sure why this delegate is necessary, but doGenCode fails without it. - def fromString(str: String): CRS = CRSParser(str) + def fromString(str: String): CRS = LazyCRS(str) } diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializer.scala similarity index 82% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializer.scala index 3f09e1f38..831411557 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializer.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializer.scala @@ -19,9 +19,9 @@ * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders -import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO +import CatalystSerializer.CatalystIO import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.ArrayData @@ -50,6 +50,8 @@ trait CatalystSerializer[T] extends Serializable { object CatalystSerializer extends StandardSerializers { def apply[T: CatalystSerializer]: CatalystSerializer[T] = implicitly + def schemaOf[T: CatalystSerializer]: StructType = apply[T].schema + /** * For some reason `Row` and `InternalRow` share no common base type. Instead of using * structural types (which use reflection), this typeclass is used to normalize access @@ -61,8 +63,8 @@ object CatalystSerializer extends StandardSerializers { def create(values: Any*): R def to[T: CatalystSerializer](t: T): R = CatalystSerializer[T].to(t, this) def toSeq[T: CatalystSerializer](t: Seq[T]): AnyRef - def get[T: CatalystSerializer](d: R, ordinal: Int): T - def getSeq[T: CatalystSerializer](d: R, ordinal: Int): Seq[T] + def get[T >: Null: CatalystSerializer](d: R, ordinal: Int): T + def getSeq[T >: Null: CatalystSerializer](d: R, ordinal: Int): Seq[T] def isNullAt(d: R, ordinal: Int): Boolean def getBoolean(d: R, ordinal: Int): Boolean def getByte(d: R, ordinal: Int): Byte @@ -91,14 +93,14 @@ object CatalystSerializer extends StandardSerializers { override def getString(d: R, ordinal: Int): String = d.getString(ordinal) override def getByteArray(d: R, ordinal: Int): Array[Byte] = d.get(ordinal).asInstanceOf[Array[Byte]] - override def get[T: CatalystSerializer](d: R, ordinal: Int): T = { + override def get[T >: Null: CatalystSerializer](d: R, ordinal: Int): T = { d.getAs[Any](ordinal) match { case r: Row => r.to[T] case o => o.asInstanceOf[T] } } override def toSeq[T: CatalystSerializer](t: Seq[T]): AnyRef = t.map(_.toRow) - override def getSeq[T: CatalystSerializer](d: R, ordinal: Int): Seq[T] = + override def getSeq[T >: Null: CatalystSerializer](d: R, ordinal: Int): Seq[T] = d.getSeq[Row](ordinal).map(_.to[T]) override def encode(str: String): String = str } @@ -118,7 +120,7 @@ object CatalystSerializer extends StandardSerializers { override def getDouble(d: InternalRow, ordinal: Int): Double = d.getDouble(ordinal) override def getString(d: InternalRow, ordinal: Int): String = d.getString(ordinal) override def getByteArray(d: InternalRow, ordinal: Int): Array[Byte] = d.getBinary(ordinal) - override def get[T: CatalystSerializer](d: InternalRow, ordinal: Int): T = { + override def get[T >: Null: CatalystSerializer](d: InternalRow, ordinal: Int): T = { val ser = CatalystSerializer[T] val struct = d.getStruct(ordinal, ser.schema.size) struct.to[T] @@ -127,7 +129,7 @@ object CatalystSerializer extends StandardSerializers { override def toSeq[T: CatalystSerializer](t: Seq[T]): ArrayData = ArrayData.toArrayData(t.map(_.toInternalRow).toArray) - override def getSeq[T: CatalystSerializer](d: InternalRow, ordinal: Int): Seq[T] = { + override def getSeq[T >: Null: CatalystSerializer](d: InternalRow, ordinal: Int): Seq[T] = { val ad = d.getArray(ordinal) val result = Array.ofDim[Any](ad.numElements()).asInstanceOf[Array[T]] ad.foreach( @@ -141,15 +143,20 @@ object CatalystSerializer extends StandardSerializers { } implicit class WithToRow[T: CatalystSerializer](t: T) { - def toInternalRow: InternalRow = CatalystSerializer[T].toInternalRow(t) - def toRow: Row = CatalystSerializer[T].toRow(t) + def toInternalRow: InternalRow = if (t == null) null else CatalystSerializer[T].toInternalRow(t) + def toRow: Row = if (t == null) null else CatalystSerializer[T].toRow(t) } implicit class WithFromInternalRow(val r: InternalRow) extends AnyVal { - def to[T: CatalystSerializer]: T = CatalystSerializer[T].fromInternalRow(r) + def to[T >: Null: CatalystSerializer]: T = if (r == null) null else CatalystSerializer[T].fromInternalRow(r) } implicit class WithFromRow(val r: Row) extends AnyVal { - def to[T: CatalystSerializer]: T = CatalystSerializer[T].fromRow(r) + def to[T >: Null: CatalystSerializer]: T = if (r == null) null else CatalystSerializer[T].fromRow(r) + } + + implicit class WithTypeConformity(val left: DataType) extends AnyVal { + def conformsTo[T >: Null: CatalystSerializer]: Boolean = + org.apache.spark.sql.rf.WithTypeConformity(left).conformsTo(schemaOf[T]) } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializerEncoder.scala similarity index 98% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializerEncoder.scala index 27e452329..792b74165 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CatalystSerializerEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/CatalystSerializerEncoder.scala @@ -19,7 +19,8 @@ * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders + import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions._ diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/CellTypeEncoder.scala similarity index 93% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/CellTypeEncoder.scala index 953c2ed65..ea01d4143 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/CellTypeEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/CellTypeEncoder.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders import geotrellis.raster.{CellType, DataType} import org.apache.spark.sql.catalyst.ScalaReflection @@ -26,7 +28,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.rf.VersionShims.InvokeSafely import org.apache.spark.sql.types.{ObjectType, StringType} import org.apache.spark.unsafe.types.UTF8String - +import CatalystSerializer._ import scala.reflect.classTag /** @@ -41,7 +43,7 @@ object CellTypeEncoder { import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.objects._ val ctType = ScalaReflection.dataTypeFor[DataType] - val schema = CatalystSerializer[CellType].schema + val schema = schemaOf[CellType] val inputObject = BoundReference(0, ctType, nullable = false) val intermediateType = ObjectType(classOf[String]) diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/DelegatingSubfieldEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/DelegatingSubfieldEncoder.scala similarity index 96% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/DelegatingSubfieldEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/DelegatingSubfieldEncoder.scala index 9b984b8ad..cf4c2e5ac 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/DelegatingSubfieldEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/DelegatingSubfieldEncoder.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedAttribute, UnresolvedExtractValue} diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/EnvelopeEncoder.scala similarity index 59% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/EnvelopeEncoder.scala index 5888a1974..50d66f3e0 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/EnvelopeEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/EnvelopeEncoder.scala @@ -1,6 +1,27 @@ -package astraea.spark.rasterframes.encoders +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ -import com.vividsolutions.jts.geom.Envelope +package org.locationtech.rasterframes.encoders + +import org.locationtech.jts.geom.Envelope import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -8,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.NewInstance import org.apache.spark.sql.catalyst.expressions.{BoundReference, CreateNamedStruct, Literal} import org.apache.spark.sql.rf.VersionShims.InvokeSafely import org.apache.spark.sql.types._ - +import CatalystSerializer._ import scala.reflect.classTag /** @@ -18,7 +39,7 @@ import scala.reflect.classTag */ object EnvelopeEncoder { - val schema = CatalystSerializer[Envelope].schema + val schema = schemaOf[Envelope] val dataType: DataType = ScalaReflection.dataTypeFor[Envelope] diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/ProjectedExtentEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/ProjectedExtentEncoder.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/ProjectedExtentEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/ProjectedExtentEncoder.scala index 0599f9848..f5b078159 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/ProjectedExtentEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/ProjectedExtentEncoder.scala @@ -15,11 +15,13 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.vector.ProjectedExtent import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/SparkBasicEncoders.scala similarity index 90% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/SparkBasicEncoders.scala index 670d2e217..e2830f7f1 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/SparkBasicEncoders.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/SparkBasicEncoders.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders import org.apache.spark.sql.{Encoder, Encoders} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -38,3 +40,4 @@ private[rasterframes] trait SparkBasicEncoders { implicit val boolEnc: Encoder[Boolean] = Encoders.scalaBoolean } +object SparkBasicEncoders extends SparkBasicEncoders \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala similarity index 83% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala index 625eea1cd..256da58d8 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardEncoders.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardEncoders.scala @@ -15,28 +15,31 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders import java.net.URI import java.sql.Timestamp -import astraea.spark.rasterframes.model._ -import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics} -import com.vividsolutions.jts.geom.Envelope +import org.locationtech.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics} +import org.locationtech.jts.geom.Envelope import geotrellis.proj4.CRS -import geotrellis.raster.{CellSize, CellType, Tile, TileLayout} +import geotrellis.raster.{CellSize, CellType, Raster, Tile, TileLayout} import geotrellis.spark.tiling.LayoutDefinition import geotrellis.spark.{KeyBounds, SpaceTimeKey, SpatialKey, TemporalKey, TemporalProjectedExtent, TileLayerMetadata} import geotrellis.vector.{Extent, ProjectedExtent} +import org.apache.spark.sql.{Encoder, Encoders} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.locationtech.geomesa.spark.jts.encoders.SpatialEncoders +import org.locationtech.rasterframes.model.{CellContext, Cells, TileContext, TileDataContext} import scala.reflect.runtime.universe._ /** - * Implicit encoder definitions for RasterFrame types. + * Implicit encoder definitions for RasterFrameLayer types. */ trait StandardEncoders extends SpatialEncoders { object PrimitiveEncoders extends SparkBasicEncoders @@ -48,6 +51,7 @@ trait StandardEncoders extends SpatialEncoders { implicit def stkBoundsEncoder: ExpressionEncoder[KeyBounds[SpaceTimeKey]] = ExpressionEncoder() implicit def extentEncoder: ExpressionEncoder[Extent] = ExpressionEncoder[Extent]() implicit def singlebandTileEncoder: ExpressionEncoder[Tile] = ExpressionEncoder() + implicit def rasterEncoder: ExpressionEncoder[Raster[Tile]] = ExpressionEncoder() implicit def tileLayerMetadataEncoder[K: TypeTag]: ExpressionEncoder[TileLayerMetadata[K]] = TileLayerMetadataEncoder() implicit def crsEncoder: ExpressionEncoder[CRS] = CRSEncoder() implicit def projectedExtentEncoder: ExpressionEncoder[ProjectedExtent] = ProjectedExtentEncoder() @@ -66,6 +70,7 @@ trait StandardEncoders extends SpatialEncoders { implicit def cellsEncoder: ExpressionEncoder[Cells] = Cells.encoder implicit def tileContextEncoder: ExpressionEncoder[TileContext] = TileContext.encoder implicit def tileDataContextEncoder: ExpressionEncoder[TileDataContext] = TileDataContext.encoder + implicit def extentTilePairEncoder: Encoder[(ProjectedExtent, Tile)] = Encoders.tuple(projectedExtentEncoder, singlebandTileEncoder) } diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardSerializers.scala similarity index 81% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/StandardSerializers.scala index aaff5c534..affe545b8 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/StandardSerializers.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/StandardSerializers.scala @@ -19,16 +19,18 @@ * */ -package astraea.spark.rasterframes.encoders -import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO -import astraea.spark.rasterframes.util.CRSParser -import com.vividsolutions.jts.geom.Envelope +package org.locationtech.rasterframes.encoders + import geotrellis.proj4.CRS import geotrellis.raster._ import geotrellis.spark._ import geotrellis.spark.tiling.LayoutDefinition import geotrellis.vector._ import org.apache.spark.sql.types._ +import org.locationtech.jts.geom.Envelope +import org.locationtech.rasterframes.TileType +import org.locationtech.rasterframes.encoders.CatalystSerializer.{CatalystIO, _} +import org.locationtech.rasterframes.model.LazyCRS /** Collection of CatalystSerializers for third-party types. */ trait StandardSerializers { @@ -77,7 +79,7 @@ trait StandardSerializers { ) ) override def from[R](row: R, io: CatalystIO[R]): CRS = - CRSParser(io.getString(row, 0)) + LazyCRS(io.getString(row, 0)) } implicit val cellTypeSerializer: CatalystSerializer[CellType] = new CatalystSerializer[CellType] { @@ -93,8 +95,8 @@ trait StandardSerializers { implicit val projectedExtentSerializer: CatalystSerializer[ProjectedExtent] = new CatalystSerializer[ProjectedExtent] { override def schema: StructType = StructType(Seq( - StructField("extent", CatalystSerializer[Extent].schema, false), - StructField("crs", CatalystSerializer[CRS].schema, false) + StructField("extent", schemaOf[Extent], false), + StructField("crs", schemaOf[CRS], false) )) override protected def to[R](t: ProjectedExtent, io: CatalystSerializer.CatalystIO[R]): R = io.create( @@ -187,8 +189,8 @@ trait StandardSerializers { implicit val layoutDefinitionSerializer = new CatalystSerializer[LayoutDefinition] { override def schema: StructType = StructType(Seq( - StructField("extent", CatalystSerializer[Extent].schema, true), - StructField("tileLayout", CatalystSerializer[TileLayout].schema, true) + StructField("extent", schemaOf[Extent], true), + StructField("tileLayout", schemaOf[TileLayout], true) )) override protected def to[R](t: LayoutDefinition, io: CatalystIO[R]): R = io.create( @@ -202,10 +204,10 @@ trait StandardSerializers { ) } - implicit def boundsSerializer[T: CatalystSerializer]: CatalystSerializer[KeyBounds[T]] = new CatalystSerializer[KeyBounds[T]] { + implicit def boundsSerializer[T >: Null: CatalystSerializer]: CatalystSerializer[KeyBounds[T]] = new CatalystSerializer[KeyBounds[T]] { override def schema: StructType = StructType(Seq( - StructField("minKey", CatalystSerializer[T].schema, true), - StructField("maxKey", CatalystSerializer[T].schema, true) + StructField("minKey", schemaOf[T], true), + StructField("maxKey", schemaOf[T], true) )) override protected def to[R](t: KeyBounds[T], io: CatalystIO[R]): R = io.create( @@ -219,13 +221,13 @@ trait StandardSerializers { ) } - def tileLayerMetadataSerializer[T: CatalystSerializer]: CatalystSerializer[TileLayerMetadata[T]] = new CatalystSerializer[TileLayerMetadata[T]] { + def tileLayerMetadataSerializer[T >: Null: CatalystSerializer]: CatalystSerializer[TileLayerMetadata[T]] = new CatalystSerializer[TileLayerMetadata[T]] { override def schema: StructType = StructType(Seq( - StructField("cellType", CatalystSerializer[CellType].schema, false), - StructField("layout", CatalystSerializer[LayoutDefinition].schema, false), - StructField("extent", CatalystSerializer[Extent].schema, false), - StructField("crs", CatalystSerializer[CRS].schema, false), - StructField("bounds", CatalystSerializer[KeyBounds[T]].schema, false) + StructField("cellType", schemaOf[CellType], false), + StructField("layout", schemaOf[LayoutDefinition], false), + StructField("extent", schemaOf[Extent], false), + StructField("crs", schemaOf[CRS], false), + StructField("bounds", schemaOf[KeyBounds[T]], false) )) override protected def to[R](t: TileLayerMetadata[T], io: CatalystIO[R]): R = io.create( @@ -245,6 +247,25 @@ trait StandardSerializers { ) } + implicit def rasterSerializer: CatalystSerializer[Raster[Tile]] = new CatalystSerializer[Raster[Tile]] { + import org.apache.spark.sql.rf.TileUDT.tileSerializer + + override def schema: StructType = StructType(Seq( + StructField("tile", TileType, false), + StructField("extent", schemaOf[Extent], false) + )) + + override protected def to[R](t: Raster[Tile], io: CatalystIO[R]): R = io.create( + io.to(t.tile), + io.to(t.extent) + ) + + override protected def from[R](t: R, io: CatalystIO[R]): Raster[Tile] = Raster( + io.get[Tile](t, 0), + io.get[Extent](t, 1) + ) + } + implicit val spatialKeyTLMSerializer = tileLayerMetadataSerializer[SpatialKey] implicit val spaceTimeKeyTLMSerializer = tileLayerMetadataSerializer[SpaceTimeKey] diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/StringBackedEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/StringBackedEncoder.scala similarity index 96% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/StringBackedEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/StringBackedEncoder.scala index 8dc950b4b..2ec265ccc 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/StringBackedEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/StringBackedEncoder.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/TemporalProjectedExtentEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/TemporalProjectedExtentEncoder.scala similarity index 76% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/TemporalProjectedExtentEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/TemporalProjectedExtentEncoder.scala index 5e44bd7fe..f69f7f160 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/TemporalProjectedExtentEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/TemporalProjectedExtentEncoder.scala @@ -15,20 +15,20 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders - -import java.time.ZonedDateTime +package org.locationtech.rasterframes.encoders -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.spark.TemporalProjectedExtent -import geotrellis.vector.ProjectedExtent import org.apache.spark.sql.Encoders import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder /** - * Custom encoder for [[ProjectedExtent]]. Necessary because [[geotrellis.proj4.CRS]] within [[ProjectedExtent]] isn't a case class, and [[ZonedDateTime]] doesn't have a natural encoder. + * Custom encoder for `TemporalProjectedExtent`. Necessary because `geotrellis.proj4.CRS` within + * `ProjectedExtent` isn't a case class, and `ZonedDateTime` doesn't have a natural encoder. * * @since 8/2/17 */ diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/TileLayerMetadataEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/TileLayerMetadataEncoder.scala similarity index 91% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/TileLayerMetadataEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/TileLayerMetadataEncoder.scala index c2ed1bbd4..2f59ea451 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/TileLayerMetadataEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/TileLayerMetadataEncoder.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders import geotrellis.spark.{KeyBounds, TileLayerMetadata} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -31,7 +33,7 @@ import scala.reflect.runtime.universe._ * @since 7/21/17 */ object TileLayerMetadataEncoder { - import astraea.spark.rasterframes._ + import org.locationtech.rasterframes._ private def fieldEncoders = Seq[(String, ExpressionEncoder[_])]( "cellType" -> cellTypeEncoder, diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/URIEncoder.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/URIEncoder.scala similarity index 92% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/URIEncoder.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/URIEncoder.scala index d50cd7803..bbbcf25ea 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/URIEncoder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/URIEncoder.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders import java.net.URI diff --git a/core/src/main/scala/astraea/spark/rasterframes/encoders/package.scala b/core/src/main/scala/org/locationtech/rasterframes/encoders/package.scala similarity index 81% rename from core/src/main/scala/astraea/spark/rasterframes/encoders/package.scala rename to core/src/main/scala/org/locationtech/rasterframes/encoders/package.scala index 678bbfcd1..8cb5a6f85 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/encoders/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/encoders/package.scala @@ -15,10 +15,13 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes +import org.apache.spark.sql.rf._ import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.Literal @@ -41,7 +44,12 @@ package object encoders { /** Constructs a catalyst literal expression from anything with a serializer. */ def SerializedLiteral[T >: Null: CatalystSerializer](t: T): Literal = { val ser = CatalystSerializer[T] - Literal.create(ser.toInternalRow(t), ser.schema) + val schema = ser.schema match { + case s if s.conformsTo(TileType.sqlType) => TileType + case s if s.conformsTo(RasterSourceType.sqlType) => RasterSourceType + case s => s + } + Literal.create(ser.toInternalRow(t), schema) } /** Constructs a Dataframe literal column from anything with a serializer. */ diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala similarity index 94% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala index 3fac44c65..bd55345fa 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryLocalRasterOp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryLocalRasterOp.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions +package org.locationtech.rasterframes.expressions -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.DynamicExtractors._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.DynamicExtractors._ import com.typesafe.scalalogging.LazyLogging import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.analysis.TypeCheckResult diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterOp.scala similarity index 93% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterOp.scala index 02f8fc29e..690658064 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/BinaryRasterOp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/BinaryRasterOp.scala @@ -19,9 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions -import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor -import astraea.spark.rasterframes.encoders.CatalystSerializer._ +package org.locationtech.rasterframes.expressions + +import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor +import org.locationtech.rasterframes.encoders.CatalystSerializer._ import com.typesafe.scalalogging.LazyLogging import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.analysis.TypeCheckResult diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala similarity index 68% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala index 1dabc8201..6a7e6e421 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/DynamicExtractors.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/DynamicExtractors.scala @@ -19,26 +19,28 @@ * */ -package astraea.spark.rasterframes.expressions -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.model.TileContext -import astraea.spark.rasterframes.ref.{ProjectedRasterLike, RasterRef, RasterSource} -import astraea.spark.rasterframes.tiles.ProjectedRasterTile +package org.locationtech.rasterframes.expressions + +import geotrellis.proj4.CRS import geotrellis.raster.{CellGrid, Tile} import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.rf.{TileUDT, _} +import org.apache.spark.sql.rf.{RasterSourceUDT, TileUDT} import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.model.{LazyCRS, TileContext} +import org.locationtech.rasterframes.ref.{ProjectedRasterLike, RasterRef, RasterSource} +import org.locationtech.rasterframes.tiles.ProjectedRasterTile -private[expressions] +private[rasterframes] object DynamicExtractors { /** Partial function for pulling a tile and its contesxt from an input row. */ lazy val tileExtractor: PartialFunction[DataType, InternalRow => (Tile, Option[TileContext])] = { case _: TileUDT => (row: InternalRow) => (row.to[Tile](TileUDT.tileSerializer), None) - case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) => + case t if t.conformsTo[ProjectedRasterTile] => (row: InternalRow) => { val prt = row.to[ProjectedRasterTile] (prt, Some(TileContext(prt))) @@ -48,7 +50,7 @@ object DynamicExtractors { lazy val rowTileExtractor: PartialFunction[DataType, Row => (Tile, Option[TileContext])] = { case _: TileUDT => (row: Row) => (row.to[Tile](TileUDT.tileSerializer), None) - case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) => + case t if t.conformsTo[ProjectedRasterTile] => (row: Row) => { val prt = row.to[ProjectedRasterTile] (prt, Some(TileContext(prt))) @@ -58,21 +60,30 @@ object DynamicExtractors { /** Partial function for pulling a ProjectedRasterLike an input row. */ lazy val projectedRasterLikeExtractor: PartialFunction[DataType, InternalRow ⇒ ProjectedRasterLike] = { case _: RasterSourceUDT ⇒ - (row: InternalRow) ⇒ row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer) - case t if t.conformsTo(CatalystSerializer[ProjectedRasterTile].schema) => + (row: InternalRow) => row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer) + case t if t.conformsTo[ProjectedRasterTile] => (row: InternalRow) => row.to[ProjectedRasterTile] - case t if t.conformsTo(CatalystSerializer[RasterRef].schema) => - (row: InternalRow) ⇒ row.to[RasterRef] + case t if t.conformsTo[RasterRef] => + (row: InternalRow) => row.to[RasterRef] } /** Partial function for pulling a CellGrid from an input row. */ lazy val gridExtractor: PartialFunction[DataType, InternalRow ⇒ CellGrid] = { - case _: TileUDT ⇒ - (row: InternalRow) ⇒ row.to[Tile](TileUDT.tileSerializer) - case _: RasterSourceUDT ⇒ - (row: InternalRow) ⇒ row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer) - case t if t.conformsTo(CatalystSerializer[RasterRef].schema) ⇒ - (row: InternalRow) ⇒ row.to[RasterRef] + case _: TileUDT => + (row: InternalRow) => row.to[Tile](TileUDT.tileSerializer) + case _: RasterSourceUDT => + (row: InternalRow) => row.to[RasterSource](RasterSourceUDT.rasterSourceSerializer) + case t if t.conformsTo[RasterRef] ⇒ + (row: InternalRow) => row.to[RasterRef] + case t if t.conformsTo[ProjectedRasterTile] => + (row: InternalRow) => row.to[ProjectedRasterTile] + } + + lazy val crsExtractor: PartialFunction[DataType, Any => CRS] = { + case _: StringType => + (v: Any) => LazyCRS(v.asInstanceOf[UTF8String].toString) + case t if t.conformsTo[CRS] => + (v: Any) => v.asInstanceOf[InternalRow].to[CRS] } sealed trait TileOrNumberArg @@ -106,9 +117,10 @@ object DynamicExtractors { lazy val intArgExtractor: PartialFunction[DataType, Any => IntegerArg] = { case _: IntegerType | _: ByteType | _: ShortType => { case i: Int => IntegerArg(i) - case b: Byte => IntegerArg(b) + case b: Byte => IntegerArg(b.toInt) case s: Short => IntegerArg(s.toInt) case c: Char => IntegerArg(c.toInt) } } + } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/NullToValue.scala similarity index 95% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/NullToValue.scala index edc52fcf7..8bc98c1e2 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/NullToValue.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/NullToValue.scala @@ -19,7 +19,8 @@ * */ -package astraea.spark.rasterframes.expressions +package org.locationtech.rasterframes.expressions + import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.UnaryExpression diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/OnCellGridExpression.scala similarity index 93% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/OnCellGridExpression.scala index b856ae2be..05d56f7d1 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnCellGridExpression.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/OnCellGridExpression.scala @@ -19,9 +19,9 @@ * */ -package astraea.spark.rasterframes.expressions +package org.locationtech.rasterframes.expressions -import astraea.spark.rasterframes.expressions.DynamicExtractors._ +import org.locationtech.rasterframes.expressions.DynamicExtractors._ import geotrellis.raster.CellGrid import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/OnTileContextExpression.scala similarity index 91% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/OnTileContextExpression.scala index a8797ae49..78ebd1f5b 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/OnTileContextExpression.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/OnTileContextExpression.scala @@ -19,14 +19,14 @@ * */ -package astraea.spark.rasterframes.expressions +package org.locationtech.rasterframes.expressions -import astraea.spark.rasterframes.expressions.DynamicExtractors._ -import astraea.spark.rasterframes.model.TileContext +import org.locationtech.rasterframes.expressions.DynamicExtractors._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions.UnaryExpression +import org.locationtech.rasterframes.model.TileContext /** * Implements boilerplate for subtype expressions processing TileUDT (when ProjectedRasterTile), RasterSourceUDT, and diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/SpatialRelation.scala similarity index 90% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/SpatialRelation.scala index e994c8a64..1d6697048 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/SpatialRelation.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/SpatialRelation.scala @@ -15,12 +15,16 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.expressions +package org.locationtech.rasterframes.expressions -import astraea.spark.rasterframes.expressions.SpatialRelation.RelationPredicate -import com.vividsolutions.jts.geom._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.SpatialRelation.RelationPredicate +import geotrellis.vector.Extent +import org.locationtech.jts.geom._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -43,6 +47,9 @@ abstract class SpatialRelation extends BinaryExpression case r: InternalRow ⇒ expr.dataType match { case udt: AbstractGeometryUDT[_] ⇒ udt.deserialize(r) + case dt if dt.conformsTo[Extent] => + val extent = r.to[Extent] + extent.jtsGeom } } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/TileAssembler.scala similarity index 77% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/TileAssembler.scala index c3a32267f..c3fe0e17b 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/TileAssembler.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/TileAssembler.scala @@ -19,26 +19,44 @@ * */ -package astraea.spark.rasterframes.expressions +package org.locationtech.rasterframes.expressions import java.nio.ByteBuffer -import astraea.spark.rasterframes.expressions.TileAssembler.TileBuffer -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes.expressions.TileAssembler.TileBuffer +import org.locationtech.rasterframes.util._ import geotrellis.raster.{DataType => _, _} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate} -import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes} -import org.apache.spark.sql.rf.TileUDT +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes} import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, TypedColumn} import spire.syntax.cfor._ +import org.locationtech.rasterframes.TileType /** * Aggregator for reassembling tiles from from exploded form * * @since 9/24/17 */ +@ExpressionDescription( + usage = "_FUNC_(colIndex, rowIndex, cellValue, tileCols, tileRows) - Assemble tiles from set of column and row indices and cell values.", + arguments = """ + Arguments: + * colIndex - column to place the cellValue in the generated tile + * rowIndex - row to place the cellValue in the generated tile + * cellValue - numeric value to place in the generated tile at colIndex and rowIndex + * tileCols - number of columns in the generated tile + * tileRows - number of rows in the generated tile""", + examples = """ + Examples: + > SELECT _FUNC_(column_index, row_index, cell0, 10, 10) as tile; + ... + > SELECT _FUNC_(column_index, row_index, tile, 10, 10) as tile2 + FROM (SELECT rf_explode_tiles(rf_make_constant_tile(4, 10, 10, 'int8raw')) as tile) + ... + """ +) case class TileAssembler( colIndex: Expression, rowIndex: Expression, @@ -49,13 +67,17 @@ case class TileAssembler( inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[TileBuffer] with ImplicitCastInputTypes { + def this(colIndex: Expression, + rowIndex: Expression, + cellValue: Expression, + tileCols: Expression, + tileRows: Expression) = this(colIndex, rowIndex, cellValue, tileCols, tileRows, 0, 0) + override def children: Seq[Expression] = Seq(colIndex, rowIndex, cellValue, tileCols, tileRows) override def inputTypes = Seq(ShortType, ShortType, DoubleType, ShortType, ShortType) - private val TileType = new TileUDT() - - override def prettyName: String = "assemble_tiles" + override def prettyName: String = "rf_assemble_tiles" override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) @@ -118,7 +140,7 @@ case class TileAssembler( val cells = Array.ofDim[Double](length) result.get(cells) val (tileCols, tileRows) = buffer.tileSize - val tile = ArrayTile(cells, tileCols, tileRows) + val tile = ArrayTile(cells, tileCols.toInt, tileRows.toInt) TileType.serialize(tile) } @@ -127,7 +149,7 @@ case class TileAssembler( } object TileAssembler { - import astraea.spark.rasterframes.encoders.StandardEncoders._ + import org.locationtech.rasterframes.encoders.StandardEncoders._ def apply( columnIndex: Column, diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala similarity index 91% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala index 049e6d9a1..46969c226 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryLocalRasterOp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryLocalRasterOp.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions +package org.locationtech.rasterframes.expressions -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.DynamicExtractors._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.DynamicExtractors._ import com.typesafe.scalalogging.LazyLogging import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.analysis.TypeCheckResult diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterAggregate.scala similarity index 91% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterAggregate.scala index a28ae6753..cfea46ebe 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterAggregate.scala @@ -19,8 +19,9 @@ * */ -package astraea.spark.rasterframes.expressions -import astraea.spark.rasterframes.expressions.DynamicExtractors.rowTileExtractor +package org.locationtech.rasterframes.expressions + +import org.locationtech.rasterframes.expressions.DynamicExtractors.rowTileExtractor import geotrellis.raster.Tile import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF} diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala similarity index 90% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala index f21dc4bb5..8d2b532c8 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/UnaryRasterOp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/UnaryRasterOp.scala @@ -19,13 +19,14 @@ * */ -package astraea.spark.rasterframes.expressions -import astraea.spark.rasterframes.expressions.DynamicExtractors._ -import astraea.spark.rasterframes.model.TileContext +package org.locationtech.rasterframes.expressions + +import org.locationtech.rasterframes.expressions.DynamicExtractors._ import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions.UnaryExpression +import org.locationtech.rasterframes.model.TileContext /** Boilerplate for expressions operating on a single Tile-like . */ trait UnaryRasterOp extends UnaryExpression { diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala similarity index 72% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala index 7cb7ba3b1..4fc0a0374 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/ExtractTile.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/ExtractTile.scala @@ -19,25 +19,26 @@ * */ -package astraea.spark.rasterframes.expressions.accessors +package org.locationtech.rasterframes.expressions.accessors -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.UnaryRasterOp -import astraea.spark.rasterframes.model.TileContext -import astraea.spark.rasterframes.tiles.InternalRowTile -import astraea.spark.rasterframes.tiles.ProjectedRasterTile.ConcreteProjectedRasterTile +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.tiles.ProjectedRasterTile.ConcreteProjectedRasterTile import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.model.TileContext +import org.locationtech.rasterframes.tiles.InternalRowTile +import org.locationtech.rasterframes._ /** Expression to extract at tile from several types that contain tiles.*/ case class ExtractTile(child: Expression) extends UnaryRasterOp with CodegenFallback { - override def dataType: DataType = new TileUDT() + override def dataType: DataType = TileType - override def nodeName: String = "extract_tile" + override def nodeName: String = "rf_extract_tile" implicit val tileSer = TileUDT.tileSerializer override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = tile match { case irt: InternalRowTile => irt.mem @@ -47,7 +48,7 @@ case class ExtractTile(child: Expression) extends UnaryRasterOp with CodegenFall } object ExtractTile { - import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder + import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder def apply(input: Column): TypedColumn[Any, Tile] = new Column(new ExtractTile(input.expr)).as[Tile] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCRS.scala similarity index 69% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCRS.scala index 1a6d29df0..10efc40b7 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCRS.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCRS.scala @@ -19,28 +19,34 @@ * */ -package astraea.spark.rasterframes.expressions.accessors +package org.locationtech.rasterframes.expressions.accessors -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.encoders.StandardEncoders.crsEncoder -import astraea.spark.rasterframes.expressions.OnTileContextExpression -import astraea.spark.rasterframes.model.TileContext +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.StandardEncoders.crsEncoder +import org.locationtech.rasterframes.expressions.OnTileContextExpression import geotrellis.proj4.CRS import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.model.TileContext /** * Expression to extract the CRS out of a RasterRef or ProjectedRasterTile column. * * @since 9/9/18 */ +@ExpressionDescription( + usage = "_FUNC_(raster) - Fetches the CRS of a ProjectedRasterTile or RasterSource.", + examples = """ + Examples: + > SELECT _FUNC_(raster); + .... + """) case class GetCRS(child: Expression) extends OnTileContextExpression with CodegenFallback { - override def dataType: DataType = CatalystSerializer[CRS].schema - override def nodeName: String = "crs" + override def dataType: DataType = schemaOf[CRS] + override def nodeName: String = "rf_crs" override def eval(ctx: TileContext): InternalRow = ctx.crs.toInternalRow } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCellType.scala similarity index 77% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCellType.scala index eeb521e4b..869835c5f 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetCellType.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetCellType.scala @@ -19,11 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.accessors +package org.locationtech.rasterframes.expressions.accessors -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.OnCellGridExpression +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.OnCellGridExpression import geotrellis.raster.{CellGrid, CellType} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -36,15 +35,15 @@ import org.apache.spark.sql.{Column, TypedColumn} */ case class GetCellType(child: Expression) extends OnCellGridExpression with CodegenFallback { - override def nodeName: String = "cell_type" + override def nodeName: String = "rf_cell_type" - def dataType: DataType = CatalystSerializer[CellType].schema + def dataType: DataType = schemaOf[CellType] /** Implemented by subtypes to process incoming ProjectedRasterLike entity. */ override def eval(cg: CellGrid): Any = cg.cellType.toInternalRow } object GetCellType { - import astraea.spark.rasterframes.encoders.StandardEncoders._ + import org.locationtech.rasterframes.encoders.StandardEncoders._ def apply(col: Column): TypedColumn[Any, CellType] = new Column(new GetCellType(col.expr)).as[CellType] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetDimensions.scala similarity index 59% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetDimensions.scala index 3589dbc1b..dffdfdecb 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetDimensions.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetDimensions.scala @@ -19,31 +19,36 @@ * */ -package astraea.spark.rasterframes.expressions.accessors +package org.locationtech.rasterframes.expressions.accessors -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.OnCellGridExpression -import astraea.spark.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.OnCellGridExpression import geotrellis.raster.CellGrid import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.locationtech.rasterframes.model.TileDimensions /** - * Extract a Tile's dimensions + * Extract a raster's dimensions * @since 12/21/17 */ -case class GetDimensions(child: Expression) extends OnCellGridExpression - with CodegenFallback { - override def nodeName: String = "tile_dimensions" +@ExpressionDescription( + usage = "_FUNC_(raster) - Fetches the dimensions (columns & rows) of a Tile, ProjectedRasterTile or RasterSource.", + examples = """ + Examples: + > SELECT _FUNC_(raster); + .... + """) +case class GetDimensions(child: Expression) extends OnCellGridExpression with CodegenFallback { + override def nodeName: String = "rf_dimensions" - def dataType = CatalystSerializer[TileDimensions].schema + def dataType = schemaOf[TileDimensions] override def eval(grid: CellGrid): Any = TileDimensions(grid.cols, grid.rows).toInternalRow } object GetDimensions { - def apply(col: Column): Column = + def apply(col: Column): TypedColumn[Any, TileDimensions] = new Column(new GetDimensions(col.expr)).as[TileDimensions] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetEnvelope.scala similarity index 86% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetEnvelope.scala index 551f64eb0..d0c14491b 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetEnvelope.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetEnvelope.scala @@ -19,10 +19,9 @@ * */ -package astraea.spark.rasterframes.expressions.accessors +package org.locationtech.rasterframes.expressions.accessors -import astraea.spark.rasterframes.encoders.EnvelopeEncoder -import com.vividsolutions.jts.geom.{Envelope, Geometry} +import org.locationtech.jts.geom.{Envelope, Geometry} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression} @@ -30,13 +29,14 @@ import org.apache.spark.sql.jts.AbstractGeometryUDT import org.apache.spark.sql.rf._ import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.encoders.EnvelopeEncoder /** * Extracts the bounding box (envelope) of arbitrary JTS Geometry. * * @since 2/22/18 */ -@deprecated("Replace usages of this with GeometryToBounds", "11/4/2018") +@deprecated("Replace usages of this with GeometryToExtent", "11/4/2018") case class GetEnvelope(child: Expression) extends UnaryExpression with CodegenFallback { override def nodeName: String = "envelope" @@ -60,7 +60,7 @@ case class GetEnvelope(child: Expression) extends UnaryExpression with CodegenFa } object GetEnvelope { - import astraea.spark.rasterframes.encoders.StandardEncoders._ + import org.locationtech.rasterframes.encoders.StandardEncoders._ def apply(col: Column): TypedColumn[Any, Envelope] = new GetEnvelope(col.expr).asColumn.as[Envelope] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetExtent.scala similarity index 64% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetExtent.scala index c3e664887..2266c69b5 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetExtent.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetExtent.scala @@ -19,28 +19,34 @@ * */ -package astraea.spark.rasterframes.expressions.accessors +package org.locationtech.rasterframes.expressions.accessors -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.encoders.StandardEncoders.extentEncoder -import astraea.spark.rasterframes.expressions.OnTileContextExpression -import astraea.spark.rasterframes.model.TileContext +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.StandardEncoders.extentEncoder +import org.locationtech.rasterframes.expressions.OnTileContextExpression import geotrellis.vector.Extent import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.model.TileContext /** - * Expression to extract the Extent out of a RasterRef or ProjectedRasterTile column. + * Expression to extract the Extent out of a RasterSource or ProjectedRasterTile column. * * @since 9/10/18 */ +@ExpressionDescription( + usage = "_FUNC_(raster) - Fetches the extent (bounding box or envelope) of a ProjectedRasterTile or RasterSource.", + examples = """ + Examples: + > SELECT _FUNC_(raster); + .... + """) case class GetExtent(child: Expression) extends OnTileContextExpression with CodegenFallback { - override def dataType: DataType = CatalystSerializer[Extent].schema - override def nodeName: String = "extent" + override def dataType: DataType = schemaOf[Extent] + override def nodeName: String = "rf_extent" override def eval(ctx: TileContext): InternalRow = ctx.extent.toInternalRow } diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetGeometry.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetGeometry.scala new file mode 100644 index 000000000..7ff3bcfc7 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetGeometry.scala @@ -0,0 +1,57 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.accessors + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.jts.JTSTypes +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.jts.geom.Geometry +import org.locationtech.rasterframes.encoders.StandardEncoders.jtsGeometryEncoder +import org.locationtech.rasterframes.expressions.OnTileContextExpression +import org.locationtech.rasterframes.model.TileContext + +/** + * Expression to extract the Extent out of a RasterSource or ProjectedRasterTile column. + * + * @since 9/10/18 + */ +@ExpressionDescription( + usage = "_FUNC_(raster) - Fetches the extent (bounding box or envelope) of a ProjectedRasterTile or RasterSource.", + examples = """ + Examples: + > SELECT _FUNC_(raster); + .... + """) +case class GetGeometry(child: Expression) extends OnTileContextExpression with CodegenFallback { + override def dataType: DataType = JTSTypes.GeometryTypeInstance + override def nodeName: String = "rf_geometry" + override def eval(ctx: TileContext): InternalRow = + JTSTypes.GeometryTypeInstance.serialize(ctx.extent.jtsGeom) +} + +object GetGeometry { + def apply(col: Column): TypedColumn[Any, Geometry] = + new Column(GetGeometry(col.expr)).as[Geometry] +} \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala similarity index 75% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala index 98b7eb401..6c9a3538a 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/accessors/GetTileContext.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/GetTileContext.scala @@ -19,19 +19,20 @@ * */ -package astraea.spark.rasterframes.expressions.accessors -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.UnaryRasterOp -import astraea.spark.rasterframes.model.TileContext +package org.locationtech.rasterframes.expressions.accessors + +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.UnaryRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.model.TileContext case class GetTileContext(child: Expression) extends UnaryRasterOp with CodegenFallback { - override def dataType: DataType = CatalystSerializer[TileContext].schema + override def dataType: DataType = schemaOf[TileContext] override def nodeName: String = "get_tile_context" override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = @@ -39,7 +40,7 @@ case class GetTileContext(child: Expression) extends UnaryRasterOp with CodegenF } object GetTileContext { - import astraea.spark.rasterframes.encoders.StandardEncoders.tileContextEncoder + import org.locationtech.rasterframes.encoders.StandardEncoders.tileContextEncoder def apply(input: Column): TypedColumn[Any, TileContext] = new Column(new GetTileContext(input.expr)).as[TileContext] diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/RealizeTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/RealizeTile.scala new file mode 100644 index 000000000..d8c9f0ba6 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/accessors/RealizeTile.scala @@ -0,0 +1,55 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.accessors + +import geotrellis.raster.Tile +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} +import org.apache.spark.sql.rf.TileUDT +import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.model.TileContext + +@ExpressionDescription( + usage = "_FUNC_(raster) - Extracts the Tile component of a RasterSource, ProjectedRasterTile (or Tile) and ensures the cells are fully fetched.", + examples = """ + Examples: + > SELECT _FUNC_(raster); + .... + """) +case class RealizeTile(child: Expression) extends UnaryRasterOp with CodegenFallback { + override def dataType: DataType = TileType + + override def nodeName: String = "rf_tile" + implicit val tileSer = TileUDT.tileSerializer + + override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = + (tile.toArrayTile(): Tile).toInternalRow +} + +object RealizeTile { + def apply(col: Column): TypedColumn[Any, Tile] = + new Column(new RealizeTile(col.expr)).as[Tile] +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellCountAggregate.scala similarity index 83% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellCountAggregate.scala index 0a4424665..82c2d3f93 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellCountAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellCountAggregate.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.aggstats +package org.locationtech.rasterframes.expressions.aggregates -import astraea.spark.rasterframes.expressions.UnaryRasterAggregate -import astraea.spark.rasterframes.expressions.tilestats.{DataCells, NoDataCells} +import org.locationtech.rasterframes.expressions.UnaryRasterAggregate +import org.locationtech.rasterframes.expressions.tilestats.{DataCells, NoDataCells} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _} import org.apache.spark.sql.types.{LongType, Metadata} @@ -47,8 +47,8 @@ abstract class CellCountAggregate(isData: Boolean) extends UnaryRasterAggregate ) private def CellTest = - if (isData) tileOpAsExpression("data_cells", DataCells.op) - else tileOpAsExpression("no_data_cells", NoDataCells.op) + if (isData) tileOpAsExpression("rf_data_cells", DataCells.op) + else tileOpAsExpression("rf_no_data_cells", NoDataCells.op) val updateExpressions = Seq( If(IsNull(child), count, Add(count, CellTest(child))) @@ -64,7 +64,7 @@ abstract class CellCountAggregate(isData: Boolean) extends UnaryRasterAggregate } object CellCountAggregate { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc @ExpressionDescription( usage = "_FUNC_(tile) - Count the total data (non-no-data) cells in a tile column.", @@ -77,7 +77,7 @@ object CellCountAggregate { 92384753""" ) case class DataCells(child: Expression) extends CellCountAggregate(true) { - override def nodeName: String = "agg_data_cells" + override def nodeName: String = "rf_agg_data_cells" } object DataCells { def apply(tile: Column): TypedColumn[Any, Long] = @@ -94,7 +94,7 @@ object CellCountAggregate { 23584""" ) case class NoDataCells(child: Expression) extends CellCountAggregate(false) { - override def nodeName: String = "agg_no_data_cells" + override def nodeName: String = "rf_agg_no_data_cells" } object NoDataCells { def apply(tile: Column): TypedColumn[Any, Long] = diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellMeanAggregate.scala similarity index 85% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellMeanAggregate.scala index 846f169cb..009a46cf3 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellMeanAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellMeanAggregate.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.aggstats +package org.locationtech.rasterframes.expressions.aggregates -import astraea.spark.rasterframes.expressions.UnaryRasterAggregate -import astraea.spark.rasterframes.expressions.tilestats.{DataCells, Sum} +import org.locationtech.rasterframes.expressions.UnaryRasterAggregate +import org.locationtech.rasterframes.expressions.tilestats.{DataCells, Sum} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _} import org.apache.spark.sql.types.{DoubleType, LongType, Metadata} @@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn} .... """) case class CellMeanAggregate(child: Expression) extends UnaryRasterAggregate { - override def nodeName: String = "agg_mean" + override def nodeName: String = "rf_agg_mean" private lazy val sum = AttributeReference("sum", DoubleType, false, Metadata.empty)() @@ -58,7 +58,7 @@ case class CellMeanAggregate(child: Expression) extends UnaryRasterAggregate { // Cant' figure out why we can't just use the Expression directly // this is necessary to properly handle null rows. For example, // if we use `tilestats.Sum` directly, we get an NPE when the stage is executed. - private val DataCellCounts = tileOpAsExpression("data_cells", DataCells.op) + private val DataCellCounts = tileOpAsExpression("rf_data_cells", DataCells.op) private val SumCells = tileOpAsExpression("sum_cells", Sum.op) override val updateExpressions = Seq( @@ -79,7 +79,7 @@ case class CellMeanAggregate(child: Expression) extends UnaryRasterAggregate { } object CellMeanAggregate { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc /** Computes the column aggregate mean. */ def apply(tile: Column): TypedColumn[Any, Double] = new Column(new CellMeanAggregate(tile.expr).toAggregateExpression()).as[Double] diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellStatsAggregate.scala similarity index 92% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellStatsAggregate.scala index cfcde38a5..95c0bd837 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/CellStatsAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/CellStatsAggregate.scala @@ -19,16 +19,16 @@ * */ -package astraea.spark.rasterframes.expressions.aggstats +package org.locationtech.rasterframes.expressions.aggregates -import astraea.spark.rasterframes.expressions.accessors.ExtractTile -import astraea.spark.rasterframes.stats.CellStatistics +import org.locationtech.rasterframes.expressions.accessors.ExtractTile +import org.locationtech.rasterframes.stats.CellStatistics +import org.locationtech.rasterframes.TileType import geotrellis.raster.{Tile, _} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete} import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression} import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} -import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types.{DataType, _} import org.apache.spark.sql.{Column, Row, TypedColumn} @@ -40,8 +40,6 @@ import org.apache.spark.sql.{Column, Row, TypedColumn} case class CellStatsAggregate() extends UserDefinedAggregateFunction { import CellStatsAggregate.C // TODO: rewrite as a DeclarativeAggregate - private val TileType = new TileUDT() - override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil) override def dataType: DataType = StructType(Seq( @@ -122,11 +120,11 @@ case class CellStatsAggregate() extends UserDefinedAggregateFunction { } object CellStatsAggregate { - import astraea.spark.rasterframes.encoders.StandardEncoders.cellStatsEncoder + import org.locationtech.rasterframes.encoders.StandardEncoders.cellStatsEncoder def apply(col: Column): TypedColumn[Any, CellStatistics] = new Column(new CellStatsAggregateUDAF(col.expr)) - .as(s"agg_stats($col)") // node renaming in class doesn't seem to propogate + .as(s"rf_agg_stats($col)") // node renaming in class doesn't seem to propogate .as[CellStatistics] /** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */ @@ -147,7 +145,7 @@ object CellStatsAggregate { class CellStatsAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) { def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new CellStatsAggregate()), Complete, false, NamedExpression.newExprId) - override def nodeName: String = "agg_stats" + override def nodeName: String = "rf_agg_stats" } object CellStatsAggregateUDAF { def apply(child: Expression): CellStatsAggregateUDAF = new CellStatsAggregateUDAF(child) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/HistogramAggregate.scala similarity index 88% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/HistogramAggregate.scala index 7920415da..44cc1324b 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/HistogramAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/HistogramAggregate.scala @@ -19,13 +19,13 @@ * */ -package astraea.spark.rasterframes.expressions.aggstats +package org.locationtech.rasterframes.expressions.aggregates import java.nio.ByteBuffer -import astraea.spark.rasterframes.expressions.accessors.ExtractTile -import astraea.spark.rasterframes.functions.safeEval -import astraea.spark.rasterframes.stats.CellHistogram +import org.locationtech.rasterframes.expressions.accessors.ExtractTile +import org.locationtech.rasterframes.functions.safeEval +import org.locationtech.rasterframes.stats.CellHistogram import geotrellis.raster.Tile import geotrellis.raster.histogram.{Histogram, StreamingHistogram} import geotrellis.spark.util.KryoSerializer @@ -33,9 +33,9 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression} import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} -import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, Row, TypedColumn} +import org.locationtech.rasterframes.TileType /** * Histogram aggregation function for a full column of tiles. @@ -45,7 +45,6 @@ import org.apache.spark.sql.{Column, Row, TypedColumn} case class HistogramAggregate(numBuckets: Int) extends UserDefinedAggregateFunction { def this() = this(StreamingHistogram.DEFAULT_NUM_BUCKETS) // TODO: rewrite as TypedAggregateExpression or similar. - private val TileType = new TileUDT() override def inputSchema: StructType = StructType(StructField("value", TileType) :: Nil) @@ -96,11 +95,11 @@ case class HistogramAggregate(numBuckets: Int) extends UserDefinedAggregateFunct } object HistogramAggregate { - import astraea.spark.rasterframes.encoders.StandardEncoders.cellHistEncoder + import org.locationtech.rasterframes.encoders.StandardEncoders.cellHistEncoder def apply(col: Column): TypedColumn[Any, CellHistogram] = new Column(new HistogramAggregateUDAF(col.expr)) - .as(s"agg_approx_histogram($col)") // node renaming in class doesn't seem to propogate + .as(s"rf_agg_approx_histogram($col)") // node renaming in class doesn't seem to propogate .as[CellHistogram] /** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */ @@ -117,7 +116,7 @@ object HistogramAggregate { class HistogramAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) { def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new HistogramAggregate()), Complete, false, NamedExpression.newExprId) - override def nodeName: String = "agg_approx_histogram" + override def nodeName: String = "rf_agg_approx_histogram" } object HistogramAggregateUDAF { def apply(child: Expression): HistogramAggregateUDAF = new HistogramAggregateUDAF(child) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalCountAggregate.scala similarity index 88% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalCountAggregate.scala index f427d9ee3..256cd63dd 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalCountAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalCountAggregate.scala @@ -19,19 +19,19 @@ * */ -package astraea.spark.rasterframes.expressions.aggstats +package org.locationtech.rasterframes.expressions.aggregates -import astraea.spark.rasterframes.expressions.accessors.ExtractTile -import astraea.spark.rasterframes.functions.safeBinaryOp +import org.locationtech.rasterframes.expressions.accessors.ExtractTile +import org.locationtech.rasterframes.functions.safeBinaryOp import geotrellis.raster.mapalgebra.local.{Add, Defined, Undefined} import geotrellis.raster.{IntConstantNoDataCellType, Tile} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete} import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression} import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} -import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types.{DataType, StructField, StructType} import org.apache.spark.sql.{Column, Row, TypedColumn} +import org.locationtech.rasterframes.TileType /** * Catalyst aggregate function that counts `NoData` values in a cell-wise fashion. @@ -47,8 +47,6 @@ class LocalCountAggregate(isData: Boolean) extends UserDefinedAggregateFunction private val add = safeBinaryOp(Add.apply(_: Tile, _: Tile)) - private val TileType = new TileUDT() - override def dataType: DataType = TileType override def inputSchema: StructType = StructType(Seq( @@ -83,19 +81,19 @@ class LocalCountAggregate(isData: Boolean) extends UserDefinedAggregateFunction override def evaluate(buffer: Row): Tile = buffer.getAs[Tile](0) } object LocalCountAggregate { - import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder + import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder @ExpressionDescription( usage = "_FUNC_(tile) - Compute cell-wise count of non-no-data values." ) class LocalDataCellsUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) { def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalCountAggregate(true)), Complete, false, NamedExpression.newExprId) - override def nodeName: String = "agg_local_data_cells" + override def nodeName: String = "rf_agg_local_data_cells" } object LocalDataCellsUDAF { def apply(child: Expression): LocalDataCellsUDAF = new LocalDataCellsUDAF(child) def apply(tile: Column): TypedColumn[Any, Tile] = new Column(new LocalDataCellsUDAF(tile.expr)) - .as(s"agg_local_data_cells($tile)") + .as(s"rf_agg_local_data_cells($tile)") .as[Tile] } @@ -104,13 +102,13 @@ object LocalCountAggregate { ) class LocalNoDataCellsUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) { def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalCountAggregate(false)), Complete, false, NamedExpression.newExprId) - override def nodeName: String = "agg_local_no_data_cells" + override def nodeName: String = "rf_agg_local_no_data_cells" } object LocalNoDataCellsUDAF { def apply(child: Expression): LocalNoDataCellsUDAF = new LocalNoDataCellsUDAF(child) def apply(tile: Column): TypedColumn[Any, Tile] = new Column(new LocalNoDataCellsUDAF(tile.expr)) - .as(s"agg_local_no_data_cells($tile)") + .as(s"rf_agg_local_no_data_cells($tile)") .as[Tile] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalMeanAggregate.scala similarity index 77% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalMeanAggregate.scala index bab1eba20..06741a98c 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalMeanAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalMeanAggregate.scala @@ -19,27 +19,26 @@ * */ -package astraea.spark.rasterframes.expressions.aggstats +package org.locationtech.rasterframes.expressions.aggregates -import astraea.spark.rasterframes.expressions.UnaryRasterAggregate -import astraea.spark.rasterframes.expressions.localops.{Add => AddTiles, Divide => DivideTiles} -import astraea.spark.rasterframes.expressions.transformers.SetCellType +import org.locationtech.rasterframes.expressions.UnaryRasterAggregate +import org.locationtech.rasterframes.expressions.localops.{BiasedAdd, Divide => DivideTiles} +import org.locationtech.rasterframes.expressions.transformers.SetCellType import geotrellis.raster.Tile import geotrellis.raster.mapalgebra.local import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ExpressionDescription, If, IsNull, Literal} -import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.TileType @ExpressionDescription( usage = "_FUNC_(tile) - Computes a new tile contining the mean cell values across all tiles in column.", note = "All tiles in the column must be the same size." ) case class LocalMeanAggregate(child: Expression) extends UnaryRasterAggregate { - private val TileType = new TileUDT() override def dataType: DataType = TileType - override def nodeName: String = "agg_local_mean" + override def nodeName: String = "rf_agg_local_mean" private lazy val count = AttributeReference("count", TileType, true)() @@ -60,21 +59,21 @@ case class LocalMeanAggregate(child: Expression) extends UnaryRasterAggregate { override lazy val updateExpressions: Seq[Expression] = Seq( If(IsNull(count), SetCellType(Defined(child), Literal("int32")), - If(IsNull(child), count, AddTiles(count, Defined(child))) + If(IsNull(child), count, BiasedAdd(count, Defined(child))) ), If(IsNull(sum), SetCellType(child, Literal("float64")), - If(IsNull(child), sum, AddTiles(sum, child)) + If(IsNull(child), sum, BiasedAdd(sum, child)) ) ) override val mergeExpressions: Seq[Expression] = Seq( - AddTiles(count.left, count.right), - AddTiles(sum.left, sum.right) + BiasedAdd(count.left, count.right), + BiasedAdd(sum.left, sum.right) ) override lazy val evaluateExpression: Expression = DivideTiles(sum, count) } object LocalMeanAggregate { - import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder + import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder def apply(tile: Column): TypedColumn[Any, Tile] = new Column(new LocalMeanAggregate(tile.expr).toAggregateExpression()).as[Tile] diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalStatsAggregate.scala similarity index 92% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalStatsAggregate.scala index 8df684a25..86b360dea 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalStatsAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalStatsAggregate.scala @@ -19,21 +19,21 @@ * */ -package astraea.spark.rasterframes.expressions.aggstats +package org.locationtech.rasterframes.expressions.aggregates -import astraea.spark.rasterframes.expressions.accessors.ExtractTile -import astraea.spark.rasterframes.functions.safeBinaryOp -import astraea.spark.rasterframes.stats.LocalCellStatistics -import astraea.spark.rasterframes.util.DataBiasedOp.{BiasedAdd, BiasedMax, BiasedMin} +import org.locationtech.rasterframes.expressions.accessors.ExtractTile +import org.locationtech.rasterframes.functions.safeBinaryOp +import org.locationtech.rasterframes.stats.LocalCellStatistics +import org.locationtech.rasterframes.util.DataBiasedOp.{BiasedAdd, BiasedMax, BiasedMin} import geotrellis.raster.mapalgebra.local._ import geotrellis.raster.{DoubleConstantNoDataCellType, IntConstantNoDataCellType, IntUserDefinedNoDataCellType, Tile} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete} import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression} import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} -import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, Row, TypedColumn} +import org.locationtech.rasterframes.TileType /** @@ -44,8 +44,6 @@ import org.apache.spark.sql.{Column, Row, TypedColumn} class LocalStatsAggregate() extends UserDefinedAggregateFunction { import LocalStatsAggregate.C - private val TileType = new TileUDT() - override def inputSchema: StructType = StructType(Seq( StructField("value", TileType, true) )) @@ -149,7 +147,7 @@ object LocalStatsAggregate { def apply(col: Column): TypedColumn[Any, LocalCellStatistics] = new Column(LocalStatsAggregateUDAF(col.expr)) - .as(s"agg_local_stats($col)") + .as(s"rf_agg_local_stats($col)") .as[LocalCellStatistics] /** Adapter hack to allow UserDefinedAggregateFunction to be referenced as an expression. */ @@ -166,7 +164,7 @@ object LocalStatsAggregate { class LocalStatsAggregateUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) { def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalStatsAggregate()), Complete, false, NamedExpression.newExprId) - override def nodeName: String = "agg_local_stats" + override def nodeName: String = "rf_agg_local_stats" } object LocalStatsAggregateUDAF { def apply(child: Expression): LocalStatsAggregateUDAF = new LocalStatsAggregateUDAF(child) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalTileOpAggregate.scala similarity index 86% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalTileOpAggregate.scala index 7a5032176..b739961c1 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/aggstats/LocalTileOpAggregate.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/LocalTileOpAggregate.scala @@ -19,21 +19,20 @@ * */ -package astraea.spark.rasterframes.expressions.aggstats +package org.locationtech.rasterframes.expressions.aggregates -import astraea.spark.rasterframes.expressions.accessors.ExtractTile -import astraea.spark.rasterframes.functions.safeBinaryOp -import astraea.spark.rasterframes.util.DataBiasedOp.{BiasedMax, BiasedMin} +import org.locationtech.rasterframes.TileType +import org.locationtech.rasterframes.expressions.accessors.ExtractTile +import org.locationtech.rasterframes.functions.safeBinaryOp +import org.locationtech.rasterframes.util.DataBiasedOp.{BiasedMax, BiasedMin} import geotrellis.raster.Tile -import geotrellis.raster.mapalgebra.local import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp -import org.apache.spark.sql.{Column, Row, TypedColumn} -import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, AggregateMode, Complete} +import org.apache.spark.sql.catalyst.expressions.{ExprId, Expression, ExpressionDescription, NamedExpression} import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} -import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types._ +import org.apache.spark.sql.{Column, Row, TypedColumn} /** * Aggregation function for applying a [[LocalTileBinaryOp]] pairwise across all tiles. Assumes Monoid algebra. @@ -44,8 +43,6 @@ class LocalTileOpAggregate(op: LocalTileBinaryOp) extends UserDefinedAggregateFu private val safeOp = safeBinaryOp(op.apply(_: Tile, _: Tile)) - private val TileType = new TileUDT() - override def inputSchema: StructType = StructType(Seq( StructField("value", TileType, true) )) @@ -75,14 +72,14 @@ class LocalTileOpAggregate(op: LocalTileBinaryOp) extends UserDefinedAggregateFu } object LocalTileOpAggregate { - import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder + import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder @ExpressionDescription( usage = "_FUNC_(tile) - Compute cell-wise minimum value from a tile column." ) class LocalMinUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) { def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalTileOpAggregate(BiasedMin)), Complete, false, NamedExpression.newExprId) - override def nodeName: String = "agg_local_min" + override def nodeName: String = "rf_agg_local_min" } object LocalMinUDAF { def apply(child: Expression): LocalMinUDAF = new LocalMinUDAF(child) @@ -94,7 +91,7 @@ object LocalTileOpAggregate { ) class LocalMaxUDAF(aggregateFunction: AggregateFunction, mode: AggregateMode, isDistinct: Boolean, resultId: ExprId) extends AggregateExpression(aggregateFunction, mode, isDistinct, resultId) { def this(child: Expression) = this(ScalaUDAF(Seq(ExtractTile(child)), new LocalTileOpAggregate(BiasedMax)), Complete, false, NamedExpression.newExprId) - override def nodeName: String = "agg_local_max" + override def nodeName: String = "rf_agg_local_max" } object LocalMaxUDAF { def apply(child: Expression): LocalMaxUDAF = new LocalMaxUDAF(child) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/ProjectedLayerMetadataAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/ProjectedLayerMetadataAggregate.scala new file mode 100644 index 000000000..0f1b4727a --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/ProjectedLayerMetadataAggregate.scala @@ -0,0 +1,179 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.aggregates + +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.CatalystSerializer +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.model.TileDimensions +import geotrellis.proj4.{CRS, Transform} +import geotrellis.raster._ +import geotrellis.raster.reproject.{Reproject, ReprojectRasterExtent} +import geotrellis.spark.tiling.LayoutDefinition +import geotrellis.spark.{KeyBounds, SpatialKey, TileLayerMetadata} +import geotrellis.vector.Extent +import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} +import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.sql.{Column, Row, TypedColumn} + +class ProjectedLayerMetadataAggregate(destCRS: CRS, destDims: TileDimensions) extends UserDefinedAggregateFunction { + import ProjectedLayerMetadataAggregate._ + + override def inputSchema: StructType = CatalystSerializer[InputRecord].schema + + override def bufferSchema: StructType = CatalystSerializer[BufferRecord].schema + + override def dataType: DataType = CatalystSerializer[TileLayerMetadata[SpatialKey]].schema + + override def deterministic: Boolean = true + + override def initialize(buffer: MutableAggregationBuffer): Unit = () + + override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { + if(!input.isNullAt(0)) { + val in = input.to[InputRecord] + + if(buffer.isNullAt(0)) { + in.toBufferRecord(destCRS).write(buffer) + } + else { + val br = buffer.to[BufferRecord] + br.merge(in.toBufferRecord(destCRS)).write(buffer) + } + } + } + + override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { + (buffer1.isNullAt(0), buffer2.isNullAt(0)) match { + case (false, false) ⇒ + val left = buffer1.to[BufferRecord] + val right = buffer2.to[BufferRecord] + left.merge(right).write(buffer1) + case (true, false) ⇒ buffer2.to[BufferRecord].write(buffer1) + case _ ⇒ () + } + } + + override def evaluate(buffer: Row): Any = { + import org.locationtech.rasterframes.encoders.CatalystSerializer._ + val buf = buffer.to[BufferRecord] + + val re = RasterExtent(buf.extent, buf.cellSize) + val layout = LayoutDefinition(re, destDims.cols, destDims.rows) + + val kb = KeyBounds(layout.mapTransform(buf.extent)) + TileLayerMetadata(buf.cellType, layout, buf.extent, destCRS, kb).toRow + } +} + +object ProjectedLayerMetadataAggregate { + import org.locationtech.rasterframes.encoders.StandardEncoders._ + + /** Primary user facing constructor */ + def apply(destCRS: CRS, extent: Column, crs: Column, cellType: Column, tileSize: Column): TypedColumn[Any, TileLayerMetadata[SpatialKey]] = + // Ordering must match InputRecord schema + new ProjectedLayerMetadataAggregate(destCRS, TileDimensions(NOMINAL_TILE_SIZE, NOMINAL_TILE_SIZE))(extent, crs, cellType, tileSize).as[TileLayerMetadata[SpatialKey]] + + def apply(destCRS: CRS, destDims: TileDimensions, extent: Column, crs: Column, cellType: Column, tileSize: Column): TypedColumn[Any, TileLayerMetadata[SpatialKey]] = + // Ordering must match InputRecord schema + new ProjectedLayerMetadataAggregate(destCRS, destDims)(extent, crs, cellType, tileSize).as[TileLayerMetadata[SpatialKey]] + + private[expressions] + case class InputRecord(extent: Extent, crs: CRS, cellType: CellType, tileSize: TileDimensions) { + def toBufferRecord(destCRS: CRS): BufferRecord = { + val transform = Transform(crs, destCRS) + + val re = ReprojectRasterExtent( + RasterExtent(extent, tileSize.cols, tileSize.rows), + transform, Reproject.Options.DEFAULT + ) + + BufferRecord( + re.extent, + cellType, + re.cellSize + ) + } + } + + private[expressions] + object InputRecord { + implicit val serializer: CatalystSerializer[InputRecord] = new CatalystSerializer[InputRecord]{ + override def schema: StructType = StructType(Seq( + StructField("extent", CatalystSerializer[Extent].schema, false), + StructField("crs", CatalystSerializer[CRS].schema, false), + StructField("cellType", CatalystSerializer[CellType].schema, false), + StructField("tileSize", CatalystSerializer[TileDimensions].schema, false) + )) + + override protected def to[R](t: InputRecord, io: CatalystIO[R]): R = + throw new IllegalStateException("InputRecord is input only.") + + override protected def from[R](t: R, io: CatalystIO[R]): InputRecord = InputRecord( + io.get[Extent](t, 0), + io.get[CRS](t, 1), + io.get[CellType](t, 2), + io.get[TileDimensions](t, 3) + ) + } + } + + private[expressions] + case class BufferRecord(extent: Extent, cellType: CellType, cellSize: CellSize) { + def merge(that: BufferRecord): BufferRecord = { + val ext = this.extent.combine(that.extent) + val ct = this.cellType.union(that.cellType) + val cs = if (this.cellSize.resolution < that.cellSize.resolution) this.cellSize else that.cellSize + BufferRecord(ext, ct, cs) + } + + def write(buffer: MutableAggregationBuffer): Unit = { + val encoded = (this).toRow + for(i <- 0 until encoded.size) { + buffer(i) = encoded(i) + } + } + } + + private[expressions] + object BufferRecord { + implicit val serializer: CatalystSerializer[BufferRecord] = new CatalystSerializer[BufferRecord] { + override def schema: StructType = StructType(Seq( + StructField("extent", CatalystSerializer[Extent].schema, true), + StructField("cellType", CatalystSerializer[CellType].schema, true), + StructField("cellSize", CatalystSerializer[CellSize].schema, true) + )) + + override protected def to[R](t: BufferRecord, io: CatalystIO[R]): R = io.create( + io.to(t.extent), + io.to(t.cellType), + io.to(t.cellSize) + ) + + override protected def from[R](t: R, io: CatalystIO[R]): BufferRecord = BufferRecord( + io.get[Extent](t, 0), + io.get[CellType](t, 1), + io.get[CellSize](t, 2) + ) + } + } +} \ No newline at end of file diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/TileRasterizerAggregate.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/TileRasterizerAggregate.scala new file mode 100644 index 000000000..e1b11ae3b --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/aggregates/TileRasterizerAggregate.scala @@ -0,0 +1,109 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.aggregates + +import geotrellis.proj4.CRS +import geotrellis.raster.reproject.Reproject +import geotrellis.raster.resample.ResampleMethod +import geotrellis.raster.{ArrayTile, CellType, Raster, Tile} +import geotrellis.spark.TileLayerMetadata +import geotrellis.vector.Extent +import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} +import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.sql.{Column, Row, TypedColumn} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate.ProjectedRasterDefinition + +/** + * Aggregation function for creating a single `geotrellis.raster.Raster[Tile]` from + * `Tile`, `CRS` and `Extent` columns. + * @param prd aggregation settings + */ +class TileRasterizerAggregate(prd: ProjectedRasterDefinition) extends UserDefinedAggregateFunction { + + val projOpts = Reproject.Options.DEFAULT.copy(method = prd.sampler) + + override def deterministic: Boolean = true + + override def inputSchema: StructType = StructType(Seq( + StructField("crs", schemaOf[CRS], false), + StructField("extent", schemaOf[Extent], false), + StructField("tile", TileType) + )) + + override def bufferSchema: StructType = StructType(Seq( + StructField("tile_buffer", TileType) + )) + + override def dataType: DataType = schemaOf[Raster[Tile]] + + override def initialize(buffer: MutableAggregationBuffer): Unit = { + buffer(0) = ArrayTile.empty(prd.cellType, prd.totalCols, prd.totalRows) + } + + override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { + val crs = input.getAs[Row](0).to[CRS] + val extent = input.getAs[Row](1).to[Extent] + + val localExtent = extent.reproject(crs, prd.crs) + + if (prd.extent.intersects(localExtent)) { + val localTile = input.getAs[Tile](2).reproject(extent, crs, prd.crs, projOpts) + val bt = buffer.getAs[Tile](0) + val merged = bt.merge(prd.extent, localExtent, localTile.tile, prd.sampler) + buffer(0) = merged + } + } + + override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { + val leftTile = buffer1.getAs[Tile](0) + val rightTile = buffer2.getAs[Tile](0) + buffer1(0) = leftTile.merge(rightTile) + } + + override def evaluate(buffer: Row): Raster[Tile] = { + val t = buffer.getAs[Tile](0) + Raster(t, prd.extent) + } +} + +object TileRasterizerAggregate { + val nodeName = "rf_tile_rasterizer_aggregate" + /** Convenience grouping of parameters needed for running aggregate. */ + case class ProjectedRasterDefinition(totalCols: Int, totalRows: Int, cellType: CellType, crs: CRS, extent: Extent, sampler: ResampleMethod = ResampleMethod.DEFAULT) + + object ProjectedRasterDefinition { + def apply(tlm: TileLayerMetadata[_]): ProjectedRasterDefinition = apply(tlm, ResampleMethod.DEFAULT) + + def apply(tlm: TileLayerMetadata[_], sampler: ResampleMethod): ProjectedRasterDefinition = { + // Try to determine the actual dimensions of our data coverage + val actualSize = tlm.layout.toRasterExtent().gridBoundsFor(tlm.extent) // <--- Do we have the math right here? + val cols = actualSize.width + val rows = actualSize.height + new ProjectedRasterDefinition(cols, rows, tlm.cellType, tlm.crs, tlm.extent, sampler) + } +} + + def apply(prd: ProjectedRasterDefinition, crsCol: Column, extentCol: Column, tileCol: Column): TypedColumn[Any, Raster[Tile]] = + new TileRasterizerAggregate(prd)(crsCol, extentCol, tileCol).as(nodeName).as[Raster[Tile]] +} \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/ExplodeTiles.scala similarity index 93% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/generators/ExplodeTiles.scala index e39ca1814..bd2a4689a 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/generators/ExplodeTiles.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/ExplodeTiles.scala @@ -19,11 +19,11 @@ * */ -package astraea.spark.rasterframes.expressions.generators +package org.locationtech.rasterframes.expressions.generators -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.util._ import geotrellis.raster._ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow @@ -43,7 +43,7 @@ case class ExplodeTiles( extends Expression with Generator with CodegenFallback { def this(children: Seq[Expression]) = this(1.0, None, children) - override def nodeName: String = "explode_tiles" + override def nodeName: String = "rf_explode_tiles" override def elementSchema: StructType = { val names = diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala new file mode 100644 index 000000000..68c7209e5 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToRasterRefs.scala @@ -0,0 +1,93 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.generators + +import com.typesafe.scalalogging.LazyLogging +import geotrellis.vector.Extent +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.bandNames +import org.locationtech.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.ref.{RasterRef, RasterSource} +import org.locationtech.rasterframes.util._ +import org.locationtech.rasterframes.RasterSourceType + +import scala.util.Try +import scala.util.control.NonFatal + +/** + * Accepts RasterSource and generates one or more RasterRef instances representing + * + * @since 9/6/18 + */ +case class RasterSourceToRasterRefs(children: Seq[Expression], bandIndexes: Seq[Int], subtileDims: Option[TileDimensions] = None) extends Expression + with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging { + + override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType) + override def nodeName: String = "rf_raster_source_to_raster_ref" + + override def elementSchema: StructType = StructType(for { + child <- children + basename = child.name + "_ref" + name <- bandNames(basename, bandIndexes) + } yield StructField(name, schemaOf[RasterRef], true)) + + private def band2ref(src: RasterSource, e: Option[Extent])(b: Int): RasterRef = + if (b < src.bandCount) RasterRef(src, b, e) else null + + override def eval(input: InternalRow): TraversableOnce[InternalRow] = { + try { + val refs = children.map { child ⇒ + val src = RasterSourceType.deserialize(child.eval(input)) + subtileDims.map(dims => + src + .layoutExtents(dims) + .map(e ⇒ bandIndexes.map(band2ref(src, Some(e)))) + ) + .getOrElse(Seq(bandIndexes.map(band2ref(src, None)))) + } + refs.transpose.map(ts ⇒ InternalRow(ts.flatMap(_.map(_.toInternalRow)): _*)) + } + catch { + case NonFatal(ex) ⇒ + val payload = Try(children.map(c => RasterSourceType.deserialize(c.eval(input)))).toOption.toSeq.flatten + logger.error("Error fetching data for one of: " + payload.mkString(", "), ex) + Traversable.empty + } + } +} + +object RasterSourceToRasterRefs { + def apply(rrs: Column*): TypedColumn[Any, RasterRef] = apply(None, Seq(0), rrs: _*) + def apply(subtileDims: Option[TileDimensions], bandIndexes: Seq[Int], rrs: Column*): TypedColumn[Any, RasterRef] = + new Column(new RasterSourceToRasterRefs(rrs.map(_.expr), bandIndexes, subtileDims)).as[RasterRef] + + private[rasterframes] def bandNames(basename: String, bandIndexes: Seq[Int]): Seq[String] = bandIndexes match { + case Seq() => Seq.empty + case Seq(0) => Seq(basename) + case s => s.map(n => basename + "_b" + n) + } +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala new file mode 100644 index 000000000..32b3f4b11 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/generators/RasterSourceToTiles.scala @@ -0,0 +1,88 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.generators + +import com.typesafe.scalalogging.LazyLogging +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.bandNames +import org.locationtech.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.tiles.ProjectedRasterTile +import org.locationtech.rasterframes.util._ +import org.locationtech.rasterframes.RasterSourceType + +import scala.util.Try +import scala.util.control.NonFatal + +/** + * Accepts RasterRef and generates one or more RasterRef instances representing the + * native internal sub-tiling, if any (and requested). + * + * @since 9/6/18 + */ +case class RasterSourceToTiles(children: Seq[Expression], bandIndexes: Seq[Int], subtileDims: Option[TileDimensions] = None) extends Expression + with Generator with CodegenFallback with ExpectsInputTypes with LazyLogging { + + override def inputTypes: Seq[DataType] = Seq.fill(children.size)(RasterSourceType) + override def nodeName: String = "rf_raster_source_to_tiles" + + override def elementSchema: StructType = StructType(for { + child <- children + basename = child.name + name <- bandNames(basename, bandIndexes) + } yield StructField(name, schemaOf[ProjectedRasterTile], true)) + + override def eval(input: InternalRow): TraversableOnce[InternalRow] = { + try { + val tiles = children.map { child ⇒ + val src = RasterSourceType.deserialize(child.eval(input)) + val maxBands = src.bandCount + val allowedBands = bandIndexes.filter(_ < maxBands) + src.readAll(subtileDims.getOrElse(rasterframes.NOMINAL_TILE_DIMS), allowedBands) + .map(r => bandIndexes.map { + case i if i < maxBands => ProjectedRasterTile(r.tile.band(i), r.extent, src.crs) + case _ => null + }) + } + tiles.transpose.map(ts ⇒ InternalRow(ts.flatMap(_.map(_.toInternalRow)): _*)) + } + catch { + case NonFatal(ex) ⇒ + val payload = Try(children.map(c => RasterSourceType.deserialize(c.eval(input)))).toOption.toSeq.flatten + logger.error("Error fetching data for one of: " + payload.mkString(", "), ex) + Traversable.empty + } + } +} + +object RasterSourceToTiles { + def apply(rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = apply(None, Seq(0), rrs: _*) + def apply(subtileDims: Option[TileDimensions], bandIndexes: Seq[Int], rrs: Column*): TypedColumn[Any, ProjectedRasterTile] = + new Column(new RasterSourceToTiles(rrs.map(_.expr), bandIndexes, subtileDims)).as[ProjectedRasterTile] +} + + diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala new file mode 100644 index 000000000..0fe6cac87 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Abs.scala @@ -0,0 +1,50 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.localops + +import geotrellis.raster.Tile +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} + +@ExpressionDescription( + usage = "_FUNC_(tile) - Compute the absolute value of each cell.", + arguments = """ + Arguments: + * tile - tile column to apply abs""", + examples = """ + Examples: + > SELECT _FUNC_(tile); + ...""" +) +case class Abs(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback { + override def nodeName: String = "rf_abs" + override def na: Any = null + override protected def op(t: Tile): Tile = t.localAbs() +} + +object Abs { + def apply(tile: Column): TypedColumn[Any, Tile] = + new Column(Abs(tile.expr)).as[Tile] +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala similarity index 76% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala index d7f1a7867..b7a3c8946 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Add.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Add.scala @@ -19,19 +19,17 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor -import astraea.spark.rasterframes.expressions.{BinaryLocalRasterOp, DynamicExtractors} -import astraea.spark.rasterframes.util.DataBiasedOp.BiasedAdd import geotrellis.raster.Tile -import org.apache.spark.sql.rf._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.functions.lit import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor @ExpressionDescription( usage = "_FUNC_(tile, rhs) - Performs cell-wise addition between two tiles or a tile and a scalar.", @@ -48,10 +46,10 @@ import org.apache.spark.sql.{Column, TypedColumn} ) case class Add(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_add" - override protected def op(left: Tile, right: Tile): Tile = BiasedAdd(left, right) - override protected def op(left: Tile, right: Double): Tile = BiasedAdd(left, right) - override protected def op(left: Tile, right: Int): Tile = BiasedAdd(left, right) + override val nodeName: String = "rf_local_add" + override protected def op(left: Tile, right: Tile): Tile = left.localAdd(right) + override protected def op(left: Tile, right: Double): Tile = left.localAdd(right) + override protected def op(left: Tile, right: Int): Tile = left.localAdd(right) override def eval(input: InternalRow): Any = { if(input == null) null diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala new file mode 100644 index 000000000..10bd82e62 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/BiasedAdd.scala @@ -0,0 +1,74 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.localops +import geotrellis.raster.Tile +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} +import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor +import org.locationtech.rasterframes.util.DataBiasedOp + +@ExpressionDescription( + usage = "_FUNC_(tile, rhs) - Performs cell-wise addition between two tiles or a tile and a scalar. " + + "Unlike a regular 'add', this considers ` + = .", + arguments = """ + Arguments: + * tile - left-hand-side tile + * rhs - a tile or scalar value to add to each cell""", + examples = """ + Examples: + > SELECT _FUNC_(tile, 1.5); + ... + > SELECT _FUNC_(tile1, tile2); + ...""" +) +case class BiasedAdd(left: Expression, right: Expression) extends BinaryLocalRasterOp + with CodegenFallback { + override val nodeName: String = "rf_local_biased_add" + override protected def op(left: Tile, right: Tile): Tile = DataBiasedOp.BiasedAdd(left, right) + override protected def op(left: Tile, right: Double): Tile = DataBiasedOp.BiasedAdd(left, right) + override protected def op(left: Tile, right: Int): Tile = DataBiasedOp.BiasedAdd(left, right) + + override def eval(input: InternalRow): Any = { + if(input == null) null + else { + val l = left.eval(input) + val r = right.eval(input) + if (l == null && r == null) null + else if (l == null) r + else if (r == null && tileExtractor.isDefinedAt(right.dataType)) l + else if (r == null) null + else nullSafeEval(l, r) + } + } +} +object BiasedAdd { + def apply(left: Column, right: Column): TypedColumn[Any, Tile] = + new Column(BiasedAdd(left.expr, right.expr)).as[Tile] + + def apply[N: Numeric](tile: Column, value: N): TypedColumn[Any, Tile] = + new Column(BiasedAdd(tile.expr, lit(value).expr)).as[Tile] +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala similarity index 90% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala index 37aa4ab6c..2b5f7d112 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Divide.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Divide.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -43,7 +43,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class Divide(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_divide" + override val nodeName: String = "rf_local_divide" override protected def op(left: Tile, right: Tile): Tile = left.localDivide(right) override protected def op(left: Tile, right: Double): Tile = left.localDivide(right) override protected def op(left: Tile, right: Int): Tile = left.localDivide(right) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala index 610b8beff..a9e809b47 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Equal.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Equal.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class Equal(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_equal" + override val nodeName: String = "rf_local_equal" override protected def op(left: Tile, right: Tile): Tile = left.localEqual(right) override protected def op(left: Tile, right: Double): Tile = left.localEqual(right) override protected def op(left: Tile, right: Int): Tile = left.localEqual(right) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala similarity index 90% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala index 40d34ee06..ca9905e29 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Exp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Exp.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class Exp(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "exp" + override val nodeName: String = "rf_exp" override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(math.E) @@ -63,7 +63,7 @@ object Exp { ...""" ) case class Exp10(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "log10" + override val nodeName: String = "rf_log10" override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(10.0) @@ -84,7 +84,7 @@ object Exp10 { ...""" ) case class Exp2(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "exp2" + override val nodeName: String = "rf_exp2" override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(2.0) @@ -105,7 +105,7 @@ object Exp2{ ...""" ) case class ExpM1(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "expm1" + override val nodeName: String = "rf_expm1" override protected def op(tile: Tile): Tile = fpTile(tile).localPowValue(math.E).localSubtract(1.0) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala index f78022972..ad9b8bf03 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Greater.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Greater.scala @@ -18,10 +18,10 @@ * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -40,7 +40,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class Greater(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_greater" + override val nodeName: String = "rf_local_greater" override protected def op(left: Tile, right: Tile): Tile = left.localGreater(right) override protected def op(left: Tile, right: Double): Tile = left.localGreater(right) override protected def op(left: Tile, right: Int): Tile = left.localGreater(right) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala index bf43ceca5..725898ca5 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/GreaterEqual.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/GreaterEqual.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class GreaterEqual(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_greater_equal" + override val nodeName: String = "rf_local_greater_equal" override protected def op(left: Tile, right: Tile): Tile = left.localGreaterOrEqual(right) override protected def op(left: Tile, right: Double): Tile = left.localGreaterOrEqual(right) override protected def op(left: Tile, right: Int): Tile = left.localGreaterOrEqual(right) diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala new file mode 100644 index 000000000..60e607f8b --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Identity.scala @@ -0,0 +1,50 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions.localops + +import geotrellis.raster.Tile +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} + +@ExpressionDescription( + usage = "_FUNC_(tile) - Return the given tile or projected raster unchanged. Useful in debugging round-trip serialization across various language and memory boundaries.", + arguments = """ + Arguments: + * tile - tile column to pass through""", + examples = """ + Examples: + > SELECT _FUNC_(tile); + ...""" +) +case class Identity(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback { + override def nodeName: String = "rf_identity" + override def na: Any = null + override protected def op(t: Tile): Tile = t +} + +object Identity { + def apply(tile: Column): TypedColumn[Any, Tile] = + new Column(Identity(tile.expr)).as[Tile] +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala index 4f8d4ad7b..a80d628f7 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Less.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Less.scala @@ -18,10 +18,10 @@ * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -40,7 +40,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class Less(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_less" + override val nodeName: String = "rf_local_less" override protected def op(left: Tile, right: Tile): Tile = left.localLess(right) override protected def op(left: Tile, right: Double): Tile = left.localLess(right) override protected def op(left: Tile, right: Int): Tile = left.localLess(right) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala index 983ac7c0d..b9361610b 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/LessEqual.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/LessEqual.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class LessEqual(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_less_equal" + override val nodeName: String = "rf_local_less_equal" override protected def op(left: Tile, right: Tile): Tile = left.localLessOrEqual(right) override protected def op(left: Tile, right: Double): Tile = left.localLessOrEqual(right) override protected def op(left: Tile, right: Int): Tile = left.localLessOrEqual(right) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala similarity index 91% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala index e2da78ce1..8e8d8a011 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Log.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Log.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.{UnaryLocalRasterOp, fpTile} import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -63,7 +63,7 @@ object Log { ...""" ) case class Log10(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "log10" + override val nodeName: String = "rf_log10" override protected def op(tile: Tile): Tile = fpTile(tile).localLog10() @@ -84,7 +84,7 @@ object Log10 { ...""" ) case class Log2(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "log2" + override val nodeName: String = "rf_log2" override protected def op(tile: Tile): Tile = fpTile(tile).localLog() / math.log(2.0) @@ -105,7 +105,7 @@ object Log2{ ...""" ) case class Log1p(child: Expression) extends UnaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "log1p" + override val nodeName: String = "rf_log1p" override protected def op(tile: Tile): Tile = fpTile(tile).localAdd(1.0).localLog() diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala similarity index 90% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala index 7ed7c76b8..784771906 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Multiply.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Multiply.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -43,7 +43,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class Multiply(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_multiply" + override val nodeName: String = "rf_local_multiply" override protected def op(left: Tile, right: Tile): Tile = left.localMultiply(right) override protected def op(left: Tile, right: Double): Tile = left.localMultiply(right) override protected def op(left: Tile, right: Int): Tile = left.localMultiply(right) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/NormalizedDifference.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/NormalizedDifference.scala index 5760582d6..e62ccfc37 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/NormalizedDifference.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/NormalizedDifference.scala @@ -19,10 +19,11 @@ * */ -package astraea.spark.rasterframes.expressions.localops -import astraea.spark.rasterframes.expressions.fpTile -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryRasterOp +package org.locationtech.rasterframes.expressions.localops + +import org.locationtech.rasterframes.expressions.fpTile +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -37,11 +38,11 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback * right - second tile argument""", examples = """ Examples: - > SELECT _FUNC_(nir, red); + > SELECT _FUNC_(nir, red) as ndvi; ...""" ) case class NormalizedDifference(left: Expression, right: Expression) extends BinaryRasterOp with CodegenFallback { - override val nodeName: String = "normalized_difference" + override val nodeName: String = "rf_normalized_difference" override protected def op(left: Tile, right: Tile): Tile = { val diff = fpTile(left.localSubtract(right)) val sum = fpTile(left.localAdd(right)) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Resample.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Resample.scala index fd2ae2f29..6e752dfbc 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Resample.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Resample.scala @@ -19,14 +19,13 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor import geotrellis.raster.Tile import geotrellis.raster.resample.NearestNeighbor -import org.apache.spark.sql.rf._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -48,7 +47,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ) case class Resample(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "resample" + override val nodeName: String = "rf_resample" override protected def op(left: Tile, right: Tile): Tile = left.resample(right.cols, right.rows, NearestNeighbor) override protected def op(left: Tile, right: Double): Tile = left.resample((left.cols * right).toInt, (left.rows * right).toInt, NearestNeighbor) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala similarity index 87% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala index 010666e17..92f1c2f89 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Round.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Round.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryLocalRasterOp} import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} @@ -40,7 +40,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ) case class Round(child: Expression) extends UnaryLocalRasterOp with NullToValue with CodegenFallback { - override def nodeName: String = "round" + override def nodeName: String = "rf_round" override def na: Any = null override protected def op(child: Tile): Tile = child.localRound() } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala similarity index 90% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala index 203bb578d..c09a7ea47 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Subtract.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Subtract.scala @@ -19,9 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +package org.locationtech.rasterframes.expressions.localops + +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -42,7 +43,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class Subtract(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_subtract" + override val nodeName: String = "rf_local_subtract" override protected def op(left: Tile, right: Tile): Tile = left.localSubtract(right) override protected def op(left: Tile, right: Double): Tile = left.localSubtract(right) override protected def op(left: Tile, right: Int): Tile = left.localSubtract(right) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala index f3342b9c6..48a1e3963 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/localops/Unequal.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/localops/Unequal.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.localops +package org.locationtech.rasterframes.expressions.localops -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.BinaryLocalRasterOp +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.BinaryLocalRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -41,7 +41,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ...""" ) case class Unequal(left: Expression, right: Expression) extends BinaryLocalRasterOp with CodegenFallback { - override val nodeName: String = "local_unequal" + override val nodeName: String = "rf_local_unequal" override protected def op(left: Tile, right: Tile): Tile = left.localUnequal(right) override protected def op(left: Tile, right: Double): Tile = left.localUnequal(right) override protected def op(left: Tile, right: Int): Tile = left.localUnequal(right) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala similarity index 80% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala index e4c0bcc00..8a8b70e00 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/package.scala @@ -15,25 +15,28 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes -import astraea.spark.rasterframes.expressions.accessors._ -import astraea.spark.rasterframes.expressions.aggstats._ -import astraea.spark.rasterframes.expressions.generators._ -import astraea.spark.rasterframes.expressions.localops._ -import astraea.spark.rasterframes.expressions.tilestats._ -import astraea.spark.rasterframes.expressions.transformers._ import geotrellis.raster.{DoubleConstantNoDataCellType, Tile} -import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF} +import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection} import org.apache.spark.sql.rf.VersionShims._ import org.apache.spark.sql.{SQLContext, rf} +import org.locationtech.rasterframes.expressions.accessors._ +import org.locationtech.rasterframes.expressions.aggregates.CellCountAggregate.DataCells +import org.locationtech.rasterframes.expressions.aggregates._ +import org.locationtech.rasterframes.expressions.generators._ +import org.locationtech.rasterframes.expressions.localops._ +import org.locationtech.rasterframes.expressions.tilestats._ +import org.locationtech.rasterframes.expressions.transformers._ -import scala.util.Try import scala.reflect.runtime.universe._ +import scala.util.Try /** * Module of Catalyst expressions for efficiently working with tiles. * @@ -61,11 +64,17 @@ package object expressions { registry.registerExpression[Add]("rf_local_add") registry.registerExpression[Subtract]("rf_local_subtract") + registry.registerExpression[TileAssembler]("rf_assemble_tile") registry.registerExpression[ExplodeTiles]("rf_explode_tiles") registry.registerExpression[GetCellType]("rf_cell_type") registry.registerExpression[SetCellType]("rf_convert_cell_type") - registry.registerExpression[GetDimensions]("rf_tile_dimensions") - registry.registerExpression[BoundsToGeometry]("rf_bounds_geometry") + registry.registerExpression[GetDimensions]("rf_dimensions") + registry.registerExpression[ExtentToGeometry]("st_geometry") + registry.registerExpression[GetGeometry]("rf_geometry") + registry.registerExpression[GeometryToExtent]("st_extent") + registry.registerExpression[GetExtent]("rf_extent") + registry.registerExpression[GetCRS]("rf_crs") + registry.registerExpression[RealizeTile]("rf_tile") registry.registerExpression[Subtract]("rf_local_subtract") registry.registerExpression[Multiply]("rf_local_multiply") registry.registerExpression[Divide]("rf_local_divide") @@ -78,6 +87,7 @@ package object expressions { registry.registerExpression[Unequal]("rf_local_unequal") registry.registerExpression[Sum]("rf_tile_sum") registry.registerExpression[Round]("rf_round") + registry.registerExpression[Abs]("rf_abs") registry.registerExpression[Log]("rf_log") registry.registerExpression[Log10]("rf_log10") registry.registerExpression[Log2]("rf_log2") @@ -92,12 +102,14 @@ package object expressions { registry.registerExpression[DataCells]("rf_data_cells") registry.registerExpression[NoDataCells]("rf_no_data_cells") registry.registerExpression[IsNoDataTile]("rf_is_no_data_tile") + registry.registerExpression[Exists]("rf_exists") + registry.registerExpression[ForAll]("rf_for_all") registry.registerExpression[TileMin]("rf_tile_min") registry.registerExpression[TileMax]("rf_tile_max") registry.registerExpression[TileMean]("rf_tile_mean") registry.registerExpression[TileStats]("rf_tile_stats") registry.registerExpression[TileHistogram]("rf_tile_histogram") - registry.registerExpression[CellCountAggregate.DataCells]("rf_agg_data_cells") + registry.registerExpression[DataCells]("rf_agg_data_cells") registry.registerExpression[CellCountAggregate.NoDataCells]("rf_agg_no_data_cells") registry.registerExpression[CellStatsAggregate.CellStatsAggregateUDAF]("rf_agg_stats") registry.registerExpression[HistogramAggregate.HistogramAggregateUDAF]("rf_agg_approx_histogram") @@ -110,9 +122,11 @@ package object expressions { registry.registerExpression[Mask.MaskByDefined]("rf_mask") registry.registerExpression[Mask.MaskByValue]("rf_mask_by_value") + registry.registerExpression[Mask.InverseMaskByValue]("rf_inverse_mask_by_value") registry.registerExpression[Mask.InverseMaskByDefined]("rf_inverse_mask") registry.registerExpression[DebugRender.RenderAscii]("rf_render_ascii") registry.registerExpression[DebugRender.RenderMatrix]("rf_render_matrix") + registry.registerExpression[transformers.ReprojectGeometry]("st_reproject") } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala index a7d49c4ae..a18148db3 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/DataCells.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/DataCells.scala @@ -19,14 +19,15 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.{UnaryRasterOp, NullToValue} -import astraea.spark.rasterframes.model.TileContext +package org.locationtech.rasterframes.expressions.tilestats + +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} import geotrellis.raster._ import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.types.{DataType, LongType} +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Counts the number of non-no-data cells in a tile", @@ -40,13 +41,13 @@ import org.apache.spark.sql.types.{DataType, LongType} ) case class DataCells(child: Expression) extends UnaryRasterOp with CodegenFallback with NullToValue { - override def nodeName: String = "data_cells" + override def nodeName: String = "rf_data_cells" override def dataType: DataType = LongType override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = DataCells.op(tile) override def na: Any = 0L } object DataCells { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc def apply(tile: Column): TypedColumn[Any, Long] = new Column(DataCells(tile.expr)).as[Long] diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala new file mode 100644 index 000000000..cd04b1467 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Exists.scala @@ -0,0 +1,47 @@ +package org.locationtech.rasterframes.expressions.tilestats + +import geotrellis.raster.Tile +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.isCellTrue +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.model.TileContext +import spire.syntax.cfor.cfor + +@ExpressionDescription( + usage = "_FUNC_(tile) - Returns true if any cells in the tile are true (non-zero and not nodata).", + arguments = + """ + Arguments: + * tile - tile to check + """, + examples = + """ + > SELECT _FUNC_(tile); + true + """ +) +case class Exists(child: Expression) extends UnaryRasterOp with CodegenFallback { + override def nodeName: String = "exists" + override def dataType: DataType = BooleanType + override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = Exists.op(tile) + +} + +object Exists{ + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc + + def apply(tile: Column): TypedColumn[Any, Boolean] = new Column(Exists(tile.expr)).as[Boolean] + + def op(tile: Tile): Boolean = { + cfor(0)(_ < tile.rows, _ + 1) { r ⇒ + cfor(0)(_ < tile.cols, _ + 1) { c ⇒ + if(tile.cellType.isFloatingPoint) { if(isCellTrue(tile.getDouble(c, r))) return true } + else { if(isCellTrue(tile.get(c, r))) return true } + } + } + false + } +} \ No newline at end of file diff --git a/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala new file mode 100644 index 000000000..a912a8a0b --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/ForAll.scala @@ -0,0 +1,51 @@ +package org.locationtech.rasterframes.expressions.tilestats + +import geotrellis.raster.Tile +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.isCellTrue +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.model.TileContext +import spire.syntax.cfor.cfor + +@ExpressionDescription( + usage = "_FUNC_(tile) - Returns true if all cells in the tile are true (non-zero and not nodata).", + arguments = + """ + Arguments: + * tile - tile to check + """, + examples = + """ + > SELECT _FUNC_(tile); + true + """ +) +case class ForAll(child: Expression) extends UnaryRasterOp with CodegenFallback { + override def nodeName: String = "for_all" + override def dataType: DataType = BooleanType + override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = ForAll.op(tile) + +} + +object ForAll { + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc + + def apply(tile: Column): TypedColumn[Any, Boolean] = new Column(ForAll(tile.expr)).as[Boolean] + + def op(tile: Tile): Boolean = { + cfor(0)(_ < tile.rows, _ + 1) { r ⇒ + cfor(0)(_ < tile.cols, _ + 1) { c ⇒ + if (tile.cellType.isFloatingPoint) { + if (!isCellTrue(tile.getDouble(c, r))) return false + } + else { + if (!isCellTrue(tile.get(c, r))) return false + } + } + } + true + } +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala similarity index 83% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala index 7b360a07c..fd855cd39 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/IsNoDataTile.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/IsNoDataTile.scala @@ -19,14 +19,15 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp} -import astraea.spark.rasterframes.model.TileContext +package org.locationtech.rasterframes.expressions.tilestats + +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} import geotrellis.raster._ import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.{BooleanType, DataType} +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Produces `true` if all the cells in a given tile are no-data", @@ -40,13 +41,13 @@ import org.apache.spark.sql.types.{BooleanType, DataType} ) case class IsNoDataTile(child: Expression) extends UnaryRasterOp with CodegenFallback with NullToValue { - override def nodeName: String = "is_no_data_tile" + override def nodeName: String = "rf_is_no_data_tile" override def na: Any = true override def dataType: DataType = BooleanType override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = tile.isNoDataTile } object IsNoDataTile { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.boolEnc def apply(tile: Column): TypedColumn[Any, Boolean] = new Column(IsNoDataTile(tile.expr)).as[Boolean] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala index 89c2ae10b..cf47ba14e 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/NoDataCells.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/NoDataCells.scala @@ -19,15 +19,15 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats +package org.locationtech.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.{UnaryRasterOp, NullToValue} -import astraea.spark.rasterframes.model.TileContext +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} import geotrellis.raster._ import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.types.{DataType, LongType} +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Counts the number of no-data cells in a tile", @@ -41,13 +41,13 @@ import org.apache.spark.sql.types.{DataType, LongType} ) case class NoDataCells(child: Expression) extends UnaryRasterOp with CodegenFallback with NullToValue { - override def nodeName: String = "no_data_cells" + override def nodeName: String = "rf_no_data_cells" override def dataType: DataType = LongType override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = NoDataCells.op(tile) override def na: Any = 0L } object NoDataCells { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.longEnc def apply(tile: Column): TypedColumn[Any, Long] = new Column(NoDataCells(tile.expr)).as[Long] diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala similarity index 83% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala index cfa10666b..096acdab6 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/Sum.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/Sum.scala @@ -19,17 +19,18 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.UnaryRasterOp -import astraea.spark.rasterframes.model.TileContext +package org.locationtech.rasterframes.expressions.tilestats + +import org.locationtech.rasterframes.expressions.UnaryRasterOp import geotrellis.raster._ import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.types.{DataType, DoubleType} import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( - usage = "_FUNC_(tile) - Computes the sum of all the cells in a tile..", + usage = "_FUNC_(tile) - Computes the sum of all the cells in a tile.", arguments = """ Arguments: * tile - tile to sum up""", @@ -39,13 +40,13 @@ import org.apache.spark.sql.{Column, TypedColumn} 2135.34""" ) case class Sum(child: Expression) extends UnaryRasterOp with CodegenFallback { - override def nodeName: String = "tile_sum" + override def nodeName: String = "rf_tile_sum" override def dataType: DataType = DoubleType override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = Sum.op(tile) } object Sum { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc def apply(tile: Column): TypedColumn[Any, Double] = new Column(Sum(tile.expr)).as[Double] diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala index d7fe7d0c1..96e3d3dcc 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileHistogram.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileHistogram.scala @@ -19,17 +19,18 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats +package org.locationtech.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.UnaryRasterOp -import astraea.spark.rasterframes.model.TileContext -import astraea.spark.rasterframes.stats.CellHistogram +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.stats.CellHistogram import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Computes per-tile histogram.", @@ -43,7 +44,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ) case class TileHistogram(child: Expression) extends UnaryRasterOp with CodegenFallback { - override def nodeName: String = "tile_histogram" + override def nodeName: String = "rf_tile_histogram" override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileHistogram.converter(TileHistogram.op(tile)) override def dataType: DataType = CellHistogram.schema diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala index 0e2595b2a..3204f4aaf 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMax.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMax.scala @@ -19,15 +19,15 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats +package org.locationtech.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp} -import astraea.spark.rasterframes.model.TileContext +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} import geotrellis.raster.{Tile, isData} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.{DataType, DoubleType} import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Determines the maximum cell value.", @@ -41,13 +41,13 @@ import org.apache.spark.sql.{Column, TypedColumn} ) case class TileMax(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { - override def nodeName: String = "tile_max" + override def nodeName: String = "rf_tile_max" override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMax.op(tile) override def dataType: DataType = DoubleType override def na: Any = Double.MinValue } object TileMax { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc def apply(tile: Column): TypedColumn[Any, Double] = new Column(TileMax(tile.expr)).as[Double] diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala similarity index 83% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala index e23e68c08..92c833f98 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMean.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMean.scala @@ -19,16 +19,15 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats +package org.locationtech.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp} -import astraea.spark.rasterframes.functions.safeEval -import astraea.spark.rasterframes.model.TileContext +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} import geotrellis.raster.{Tile, isData} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.{DataType, DoubleType} import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Computes the mean cell value of a tile.", @@ -42,13 +41,13 @@ import org.apache.spark.sql.{Column, TypedColumn} ) case class TileMean(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { - override def nodeName: String = "tile_mean" + override def nodeName: String = "rf_tile_mean" override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMean.op(tile) override def dataType: DataType = DoubleType override def na: Any = Double.NaN } object TileMean { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc def apply(tile: Column): TypedColumn[Any, Double] = new Column(TileMean(tile.expr)).as[Double] diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala similarity index 85% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala index 4d2edc9b3..71fa0194a 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileMin.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileMin.scala @@ -19,15 +19,15 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats +package org.locationtech.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.{NullToValue, UnaryRasterOp} -import astraea.spark.rasterframes.model.TileContext +import org.locationtech.rasterframes.expressions.{NullToValue, UnaryRasterOp} import geotrellis.raster.{Tile, isData} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.{DataType, DoubleType} import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Determines the minimum cell value.", @@ -41,13 +41,13 @@ import org.apache.spark.sql.{Column, TypedColumn} ) case class TileMin(child: Expression) extends UnaryRasterOp with NullToValue with CodegenFallback { - override def nodeName: String = "tile_min" + override def nodeName: String = "rf_tile_min" override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileMin.op(tile) override def dataType: DataType = DoubleType override def na: Any = Double.MaxValue } object TileMin { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.doubleEnc def apply(tile: Column): TypedColumn[Any, Double] = new Column(TileMin(tile.expr)).as[Double] diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala index 015f048e8..fac6d330e 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/tilestats/TileStats.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/tilestats/TileStats.scala @@ -19,17 +19,18 @@ * */ -package astraea.spark.rasterframes.expressions.tilestats +package org.locationtech.rasterframes.expressions.tilestats -import astraea.spark.rasterframes.expressions.UnaryRasterOp -import astraea.spark.rasterframes.model.TileContext -import astraea.spark.rasterframes.stats.CellStatistics +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.stats.CellStatistics import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Computes per-tile descriptive statistics.", @@ -43,7 +44,7 @@ import org.apache.spark.sql.{Column, TypedColumn} ) case class TileStats(child: Expression) extends UnaryRasterOp with CodegenFallback { - override def nodeName: String = "tile_stats" + override def nodeName: String = "rf_tile_stats" override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = TileStats.converter(TileStats.op(tile).orNull) override def dataType: DataType = CellStatistics.schema diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala index c26cc6b51..babb9c7b7 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/DebugRender.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/DebugRender.scala @@ -19,10 +19,10 @@ * */ -package astraea.spark.rasterframes.expressions.transformers -import astraea.spark.rasterframes.expressions.UnaryRasterOp -import astraea.spark.rasterframes.model.TileContext -import astraea.spark.rasterframes.util.TileAsMatrix +package org.locationtech.rasterframes.expressions.transformers + +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.util.TileAsMatrix import geotrellis.raster.Tile import geotrellis.raster.render.ascii.AsciiArtEncoder import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescript import org.apache.spark.sql.types.{DataType, StringType} import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.unsafe.types.UTF8String +import org.locationtech.rasterframes.model.TileContext abstract class DebugRender(asciiArt: Boolean) extends UnaryRasterOp with CodegenFallback with Serializable { @@ -45,7 +46,7 @@ abstract class DebugRender(asciiArt: Boolean) extends UnaryRasterOp } object DebugRender { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.stringEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.stringEnc @ExpressionDescription( usage = "_FUNC_(tile) - Coverts the contents of the given tile an ASCII art string rendering", @@ -54,7 +55,7 @@ object DebugRender { * tile - tile to render""" ) case class RenderAscii(child: Expression) extends DebugRender(true) { - override def nodeName: String = "render_ascii" + override def nodeName: String = "rf_render_ascii" } object RenderAscii { def apply(tile: Column): TypedColumn[Any, String] = @@ -68,7 +69,7 @@ object DebugRender { * tile - tile to render""" ) case class RenderMatrix(child: Expression) extends DebugRender(false) { - override def nodeName: String = "render_matrix" + override def nodeName: String = "rf_render_matrix" } object RenderMatrix { def apply(tile: Column): TypedColumn[Any, String] = diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ExtentToGeometry.scala similarity index 76% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ExtentToGeometry.scala index 9d6a8c652..9d2d12d2f 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/BoundsToGeometry.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ExtentToGeometry.scala @@ -19,12 +19,11 @@ * */ -package astraea.spark.rasterframes.expressions.transformers +package org.locationtech.rasterframes.expressions.transformers -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.row -import com.vividsolutions.jts.geom.{Envelope, Geometry} +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.row +import org.locationtech.jts.geom.{Envelope, Geometry} import geotrellis.vector.Extent import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} @@ -40,13 +39,13 @@ import org.locationtech.geomesa.spark.jts.encoders.SpatialEncoders * * @since 8/24/18 */ -case class BoundsToGeometry(child: Expression) extends UnaryExpression with CodegenFallback { - override def nodeName: String = "bounds_geometry" +case class ExtentToGeometry(child: Expression) extends UnaryExpression with CodegenFallback { + override def nodeName: String = "st_geometry" override def dataType: DataType = JTSTypes.GeometryTypeInstance - private val envSchema = CatalystSerializer[Envelope].schema - private val extSchema = CatalystSerializer[Extent].schema + private val envSchema = schemaOf[Envelope] + private val extSchema = schemaOf[Extent] override def checkInputDataTypes(): TypeCheckResult = { child.dataType match { @@ -71,7 +70,7 @@ case class BoundsToGeometry(child: Expression) extends UnaryExpression with Code } } -object BoundsToGeometry extends SpatialEncoders { +object ExtentToGeometry extends SpatialEncoders { def apply(bounds: Column): TypedColumn[Any, Geometry] = - new Column(new BoundsToGeometry(bounds.expr)).as[Geometry] + new Column(new ExtentToGeometry(bounds.expr)).as[Geometry] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/GeometryToExtent.scala similarity index 76% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/GeometryToExtent.scala index 4e08ad9ea..adb52468b 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/GeometryToBounds.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/GeometryToExtent.scala @@ -19,10 +19,9 @@ * */ -package astraea.spark.rasterframes.expressions.transformers +package org.locationtech.rasterframes.expressions.transformers -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ import geotrellis.vector.Extent import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} @@ -37,10 +36,10 @@ import org.apache.spark.sql.{Column, TypedColumn} * * @since 8/24/18 */ -case class GeometryToBounds(child: Expression) extends UnaryExpression with CodegenFallback { - override def nodeName: String = "geometry_bounds" +case class GeometryToExtent(child: Expression) extends UnaryExpression with CodegenFallback { + override def nodeName: String = "st_extent" - override def dataType: DataType = CatalystSerializer[Extent].schema + override def dataType: DataType = schemaOf[Extent] override def checkInputDataTypes(): TypeCheckResult = { child.dataType match { @@ -54,13 +53,13 @@ case class GeometryToBounds(child: Expression) extends UnaryExpression with Code override protected def nullSafeEval(input: Any): Any = { val geom = JTSTypes.GeometryTypeInstance.deserialize(input) val extent = Extent(geom.getEnvelopeInternal) - CatalystSerializer[Extent].toInternalRow(extent) + extent.toInternalRow } } -object GeometryToBounds { - import astraea.spark.rasterframes.encoders.StandardEncoders._ +object GeometryToExtent { + import org.locationtech.rasterframes.encoders.StandardEncoders._ def apply(bounds: Column): TypedColumn[Any, Extent] = - new Column(new GeometryToBounds(bounds.expr)).as[Extent] + new Column(new GeometryToExtent(bounds.expr)).as[Extent] } \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/Mask.scala similarity index 79% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/Mask.scala index 03e81efc2..106a52a7b 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/Mask.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/Mask.scala @@ -19,10 +19,11 @@ * */ -package astraea.spark.rasterframes.expressions.transformers -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.DynamicExtractors._ -import astraea.spark.rasterframes.expressions.row +package org.locationtech.rasterframes.expressions.transformers + +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.DynamicExtractors._ +import org.locationtech.rasterframes.expressions.row import com.typesafe.scalalogging.LazyLogging import geotrellis.raster import geotrellis.raster.Tile @@ -81,7 +82,7 @@ abstract class Mask(val left: Expression, val middle: Expression, val right: Exp } } object Mask { - import astraea.spark.rasterframes.encoders.StandardEncoders.singlebandTileEncoder + import org.locationtech.rasterframes.encoders.StandardEncoders.singlebandTileEncoder @ExpressionDescription( usage = "_FUNC_(target, mask) - Generate a tile with the values from the data tile, but where cells in the masking tile contain NODATA, replace the data value with NODATA.", @@ -96,7 +97,7 @@ object Mask { ) case class MaskByDefined(target: Expression, mask: Expression) extends Mask(target, mask, Literal(0), false) { - override def nodeName: String = "mask" + override def nodeName: String = "rf_mask" } object MaskByDefined { def apply(targetTile: Column, maskTile: Column): TypedColumn[Any, Tile] = @@ -116,7 +117,7 @@ object Mask { ) case class InverseMaskByDefined(leftTile: Expression, rightTile: Expression) extends Mask(leftTile, rightTile, Literal(0), true) { - override def nodeName: String = "inverse_mask" + override def nodeName: String = "rf_inverse_mask" } object InverseMaskByDefined { def apply(srcTile: Column, maskingTile: Column): TypedColumn[Any, Tile] = @@ -136,10 +137,32 @@ object Mask { ) case class MaskByValue(leftTile: Expression, rightTile: Expression, maskValue: Expression) extends Mask(leftTile, rightTile, maskValue, false) { - override def nodeName: String = "mask_by_value" + override def nodeName: String = "rf_mask_by_value" } object MaskByValue { def apply(srcTile: Column, maskingTile: Column, maskValue: Column): TypedColumn[Any, Tile] = new Column(MaskByValue(srcTile.expr, maskingTile.expr, maskValue.expr)).as[Tile] } + + @ExpressionDescription( + usage = "_FUNC_(target, mask, maskValue) - Generate a tile with the values from the data tile, but where cells in the masking tile DO NOT contain the masking value, replace the data value with NODATA.", + arguments = """ + Arguments: + * target - tile to mask + * mask - masking definition + * maskValue - value in the `mask` for which to mark `target` as data cells + """, + examples = """ + Examples: + > SELECT _FUNC_(target, mask, maskValue); + ...""" + ) + case class InverseMaskByValue(leftTile: Expression, rightTile: Expression, maskValue: Expression) + extends Mask(leftTile, rightTile, maskValue, true) { + override def nodeName: String = "rf_inverse_mask_by_value" + } + object InverseMaskByValue { + def apply(srcTile: Column, maskingTile: Column, maskValue: Column): TypedColumn[Any, Tile] = + new Column(InverseMaskByValue(srcTile.expr, maskingTile.expr, maskValue.expr)).as[Tile] + } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RasterRefToTile.scala similarity index 70% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RasterRefToTile.scala index c3aa3f337..f0c82c6de 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/RasterRefToTile.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/RasterRefToTile.scala @@ -19,19 +19,18 @@ * */ -package astraea.spark.rasterframes.expressions.transformers +package org.locationtech.rasterframes.expressions.transformers -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.expressions.row -import astraea.spark.rasterframes.ref.RasterRef +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.row import com.typesafe.scalalogging.LazyLogging -import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression} import org.apache.spark.sql.rf._ import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.ref.RasterRef +import org.locationtech.rasterframes.tiles.ProjectedRasterTile /** * Realizes a RasterRef into a Tile. @@ -43,19 +42,18 @@ case class RasterRefToTile(child: Expression) extends UnaryExpression override def nodeName: String = "raster_ref_to_tile" - override def inputTypes = Seq(CatalystSerializer[RasterRef].schema) + override def inputTypes = Seq(schemaOf[RasterRef]) - override def dataType: DataType = new TileUDT + override def dataType: DataType = schemaOf[ProjectedRasterTile] override protected def nullSafeEval(input: Any): Any = { implicit val ser = TileUDT.tileSerializer val ref = row(input).to[RasterRef] - (ref.tile: Tile).toInternalRow + ref.tile.toInternalRow } } object RasterRefToTile { - import astraea.spark.rasterframes.encoders.StandardEncoders._ - def apply(rr: Column): TypedColumn[Any, Tile] = - new Column(RasterRefToTile(rr.expr)).as[Tile] + def apply(rr: Column): TypedColumn[Any, ProjectedRasterTile] = + new Column(RasterRefToTile(rr.expr)).as[ProjectedRasterTile] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ReprojectGeometry.scala similarity index 53% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ReprojectGeometry.scala index 7e78c5942..9c1ab2234 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/ReprojectGeometry.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/ReprojectGeometry.scala @@ -19,37 +19,54 @@ * */ -package astraea.spark.rasterframes.expressions.transformers +package org.locationtech.rasterframes.expressions.transformers -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.encoders.{CatalystSerializer, serialized_literal} -import astraea.spark.rasterframes.jts.ReprojectionTransformer -import com.vividsolutions.jts.geom.Geometry +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.serialized_literal +import org.locationtech.jts.geom.Geometry import geotrellis.proj4.CRS import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback -import org.apache.spark.sql.jts.JTSTypes +import org.apache.spark.sql.jts.{AbstractGeometryUDT, JTSTypes} import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.expressions.DynamicExtractors +import org.locationtech.rasterframes.jts.ReprojectionTransformer +import org.locationtech.rasterframes.model.LazyCRS -/** - * - * - * @since 11/29/18 - */ +@ExpressionDescription( + usage = "_FUNC_(geom, srcCRS, dstCRS) - Reprojects the given `geom` from `srcCRS` to `dstCRS", + arguments = """ + Arguments: + * geom - the geometry column to reproject + * srcCRS - the CRS of the `geom` column + * dstCRS - the CRS to project geometry into""", + examples = """ + Examples: + > SELECT _FUNC_(geom, srcCRS, dstCRS); + ...""" +) case class ReprojectGeometry(geometry: Expression, srcCRS: Expression, dstCRS: Expression) extends Expression - with CodegenFallback with ExpectsInputTypes { + with CodegenFallback { - override def nodeName: String = "reproject_geometry" + override def nodeName: String = "st_reproject" override def dataType: DataType = JTSTypes.GeometryTypeInstance override def nullable: Boolean = geometry.nullable || srcCRS.nullable || dstCRS.nullable override def children: Seq[Expression] = Seq(geometry, srcCRS, dstCRS) - private def crsSerde = CatalystSerializer[CRS] - override val inputTypes = Seq( - dataType, crsSerde.schema, crsSerde.schema - ) + + override def checkInputDataTypes(): TypeCheckResult = { + if (!geometry.dataType.isInstanceOf[AbstractGeometryUDT[_]]) + TypeCheckFailure(s"Input type '${geometry.dataType}' does not conform to a geometry type.") + else if(!DynamicExtractors.crsExtractor.isDefinedAt(srcCRS.dataType)) + TypeCheckFailure(s"Input type '${srcCRS.dataType}' cannot be interpreted as a CRS.") + else if(!DynamicExtractors.crsExtractor.isDefinedAt(dstCRS.dataType)) + TypeCheckFailure(s"Input type '${dstCRS.dataType}' cannot be interpreted as a CRS.") + else TypeCheckResult.TypeCheckSuccess + } /** Reprojects a geometry column from one CRS to another. */ val reproject: (Geometry, CRS, CRS) ⇒ Geometry = @@ -59,10 +76,15 @@ case class ReprojectGeometry(geometry: Expression, srcCRS: Expression, dstCRS: E } override def eval(input: InternalRow): Any = { - val geom = JTSTypes.GeometryTypeInstance.deserialize(geometry.eval(input)) - val src = srcCRS.eval(input).asInstanceOf[InternalRow].to[CRS] - val dst = dstCRS.eval(input).asInstanceOf[InternalRow].to[CRS] - JTSTypes.GeometryTypeInstance.serialize(reproject(geom, src, dst)) + val src = DynamicExtractors.crsExtractor(srcCRS.dataType)(srcCRS.eval(input)) + val dst = DynamicExtractors.crsExtractor(dstCRS.dataType)(dstCRS.eval(input)) + (src, dst) match { + // Optimized pass-through case. + case (s: LazyCRS, r: LazyCRS) if s.encoded == r.encoded => geometry.eval(input) + case _ => + val geom = JTSTypes.GeometryTypeInstance.deserialize(geometry.eval(input)) + JTSTypes.GeometryTypeInstance.serialize(reproject(geom, src, dst)) + } } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/SetCellType.scala similarity index 86% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/SetCellType.scala index 96fcd4288..6990e34df 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/SetCellType.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/SetCellType.scala @@ -19,13 +19,12 @@ * */ -package astraea.spark.rasterframes.expressions.transformers +package org.locationtech.rasterframes.expressions.transformers -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.encoders.StandardEncoders._ -import astraea.spark.rasterframes.expressions.DynamicExtractors.tileExtractor -import astraea.spark.rasterframes.expressions.row +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.StandardEncoders._ +import org.locationtech.rasterframes.expressions.DynamicExtractors.tileExtractor +import org.locationtech.rasterframes.expressions.row import geotrellis.raster.{CellType, Tile} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult @@ -33,7 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression} import org.apache.spark.sql.functions.lit -import org.apache.spark.sql.rf._ +import org.apache.spark.sql.rf.{TileUDT, WithTypeConformity} import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.unsafe.types.UTF8String @@ -50,7 +49,7 @@ case class SetCellType(tile: Expression, cellType: Expression) override def nodeName: String = "set_cell_type" override def dataType: DataType = left.dataType - private val ctSchema = CatalystSerializer[CellType].schema + private val ctSchema = schemaOf[CellType] override def checkInputDataTypes(): TypeCheckResult = { if (!tileExtractor.isDefinedAt(left.dataType)) diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala similarity index 83% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala index 02a4bc4e8..5d7786f1c 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayDouble.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayDouble.scala @@ -19,15 +19,16 @@ * */ -package astraea.spark.rasterframes.expressions.transformers -import astraea.spark.rasterframes.expressions.UnaryRasterOp -import astraea.spark.rasterframes.model.TileContext +package org.locationtech.rasterframes.expressions.transformers + +import org.locationtech.rasterframes.expressions.UnaryRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.types.{DataType, DataTypes, DoubleType} import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Coverts the contents of the given tile to an array of double floating-point values", @@ -36,14 +37,14 @@ import org.apache.spark.sql.{Column, TypedColumn} * tile - tile to convert""" ) case class TileToArrayDouble(child: Expression) extends UnaryRasterOp with CodegenFallback { - override def nodeName: String = "tile_to_array_double" + override def nodeName: String = "rf_tile_to_array_double" override def dataType: DataType = DataTypes.createArrayType(DoubleType, false) override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = { ArrayData.toArrayData(tile.toArrayDouble()) } } object TileToArrayDouble { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc def apply(tile: Column): TypedColumn[Any, Array[Double]] = new Column(TileToArrayDouble(tile.expr)).as[Array[Double]] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala similarity index 81% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala index 31ad81516..c299d57c7 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/TileToArrayInt.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/TileToArrayInt.scala @@ -19,16 +19,17 @@ * */ -package astraea.spark.rasterframes.expressions.transformers +package org.locationtech.rasterframes.expressions.transformers -import astraea.spark.rasterframes.expressions.UnaryRasterOp -import astraea.spark.rasterframes.model.TileContext +import org.locationtech.rasterframes.expressions.UnaryRasterOp import geotrellis.raster.Tile import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.types.{DataType, DataTypes, IntegerType} import org.apache.spark.sql.{Column, TypedColumn} +import org.locationtech.rasterframes.expressions.UnaryRasterOp +import org.locationtech.rasterframes.model.TileContext @ExpressionDescription( usage = "_FUNC_(tile) - Coverts the contents of the given tile to an array of integer values", @@ -37,14 +38,14 @@ import org.apache.spark.sql.{Column, TypedColumn} * tile - tile to convert""" ) case class TileToArrayInt(child: Expression) extends UnaryRasterOp with CodegenFallback { - override def nodeName: String = "tile_to_array_int" + override def nodeName: String = "rf_tile_to_array_int" override def dataType: DataType = DataTypes.createArrayType(IntegerType, false) override protected def eval(tile: Tile, ctx: Option[TileContext]): Any = { ArrayData.toArrayData(tile.toArray()) } } object TileToArrayInt { - import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc + import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.arrayEnc def apply(tile: Column): TypedColumn[Any, Array[Int]] = new Column(TileToArrayInt(tile.expr)).as[Array[Int]] } diff --git a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/URIToRasterSource.scala similarity index 61% rename from core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala rename to core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/URIToRasterSource.scala index 0821e43db..903e62dde 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/expressions/transformers/URIToRasterSource.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/expressions/transformers/URIToRasterSource.scala @@ -19,20 +19,18 @@ * */ -package astraea.spark.rasterframes.expressions.transformers +package org.locationtech.rasterframes.expressions.transformers import java.net.URI -import astraea.spark.rasterframes.ref.RasterSource.ReadCallback -import astraea.spark.rasterframes.ref.{RasterRef, RasterSource} +import org.locationtech.rasterframes.RasterSourceType import com.typesafe.scalalogging.LazyLogging import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression} -import org.apache.spark.sql.rf._ import org.apache.spark.sql.types.{DataType, StringType} import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.unsafe.types.UTF8String - +import org.locationtech.rasterframes.ref.RasterSource /** * Catalyst generator to convert a geotiff download URL into a series of rows @@ -40,28 +38,24 @@ import org.apache.spark.unsafe.types.UTF8String * * @since 5/4/18 */ -case class URIToRasterSource(override val child: Expression, accumulator: Option[ReadCallback]) +case class URIToRasterSource(override val child: Expression) extends UnaryExpression with ExpectsInputTypes with CodegenFallback with LazyLogging { - override def nodeName: String = "uri_to_raster_source" + override def nodeName: String = "rf_uri_to_raster_source" - override def dataType: DataType = new RasterSourceUDT + override def dataType: DataType = RasterSourceType override def inputTypes = Seq(StringType) override protected def nullSafeEval(input: Any): Any = { val uriString = input.asInstanceOf[UTF8String].toString val uri = URI.create(uriString) - val ref = RasterSource(uri, accumulator) - RasterSourceUDT.serialize(ref) + val ref = RasterSource(uri) + RasterSourceType.serialize(ref) } } object URIToRasterSource { - def apply(rasterURI: Column): TypedColumn[Any, RasterRef] = - new Column(new URIToRasterSource(rasterURI.expr, None)).as[RasterRef] - def apply(rasterURI: Column, accumulator: ReadCallback): TypedColumn[Any, RasterRef] = - new Column(new URIToRasterSource(rasterURI.expr, Option(accumulator))).as[RasterRef] - def apply(rasterURI: Column, accumulator: Option[ReadCallback]): TypedColumn[Any, RasterRef] = - new Column(new URIToRasterSource(rasterURI.expr, accumulator)).as[RasterRef] + def apply(rasterURI: Column): TypedColumn[Any, RasterSource] = + new Column(new URIToRasterSource(rasterURI.expr)).as[RasterSource] } diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/ContextRDDMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/ContextRDDMethods.scala new file mode 100644 index 000000000..7bf3230b3 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/ContextRDDMethods.scala @@ -0,0 +1,73 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2017 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.extensions + +import org.locationtech.rasterframes.PairRDDConverter._ +import org.locationtech.rasterframes.StandardColumns._ +import Implicits._ +import org.locationtech.rasterframes.util._ +import org.locationtech.rasterframes.RasterFrameLayer +import geotrellis.raster.CellGrid +import geotrellis.spark._ +import geotrellis.spark.io._ +import geotrellis.util.MethodExtensions +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SparkSession +import org.locationtech.rasterframes.PairRDDConverter + +/** + * Extension method on `ContextRDD`-shaped RDDs with appropriate context bounds to create a RasterFrameLayer. + * @since 7/18/17 + */ +abstract class SpatialContextRDDMethods[T <: CellGrid](implicit spark: SparkSession) + extends MethodExtensions[RDD[(SpatialKey, T)] with Metadata[TileLayerMetadata[SpatialKey]]] { + import PairRDDConverter._ + + def toLayer(implicit converter: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = toLayer(TILE_COLUMN.columnName) + + def toLayer(tileColumnName: String)(implicit converter: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = { + val df = self.toDataFrame.setSpatialColumnRole(SPATIAL_KEY_COLUMN, self.metadata) + val defName = TILE_COLUMN.columnName + df.mapWhen(_ ⇒ tileColumnName != defName, _.withColumnRenamed(defName, tileColumnName)) + .certify + } +} + +/** + * Extension method on `ContextRDD`-shaped `Tile` RDDs keyed with [[SpaceTimeKey]], with appropriate context bounds to create a RasterFrameLayer. + * @since 9/11/17 + */ +abstract class SpatioTemporalContextRDDMethods[T <: CellGrid]( + implicit spark: SparkSession) + extends MethodExtensions[RDD[(SpaceTimeKey, T)] with Metadata[TileLayerMetadata[SpaceTimeKey]]] { + + def toLayer(implicit converter: PairRDDConverter[SpaceTimeKey, T]): RasterFrameLayer = toLayer(TILE_COLUMN.columnName) + + def toLayer(tileColumnName: String)(implicit converter: PairRDDConverter[SpaceTimeKey, T]): RasterFrameLayer = { + val df = self.toDataFrame + .setSpatialColumnRole(SPATIAL_KEY_COLUMN, self.metadata) + .setTemporalColumnRole(TEMPORAL_KEY_COLUMN) + val defName = TILE_COLUMN.columnName + df.mapWhen(_ ⇒ tileColumnName != defName, _.withColumnRenamed(defName, tileColumnName)) + .certify + } +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/DataFrameMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/DataFrameMethods.scala new file mode 100644 index 000000000..1e94ff3ca --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/DataFrameMethods.scala @@ -0,0 +1,305 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2017 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.extensions + +import geotrellis.proj4.CRS +import geotrellis.spark.io._ +import geotrellis.spark.{SpaceTimeKey, SpatialComponent, SpatialKey, TemporalKey, TileLayerMetadata} +import geotrellis.util.MethodExtensions +import geotrellis.vector.Extent +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.types.{MetadataBuilder, StructField} +import org.apache.spark.sql.{Column, DataFrame, TypedColumn} +import org.locationtech.rasterframes.StandardColumns._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.StandardEncoders._ +import org.locationtech.rasterframes.expressions.DynamicExtractors +import org.locationtech.rasterframes.tiles.ProjectedRasterTile +import org.locationtech.rasterframes.util._ +import org.locationtech.rasterframes.{MetadataKeys, RasterFrameLayer} +import spray.json.JsonFormat + +import scala.util.Try + +/** + * Extension methods over [[DataFrame]]. + * + * @since 7/18/17 + */ +trait DataFrameMethods[DF <: DataFrame] extends MethodExtensions[DF] with MetadataKeys { + import Implicits.{WithDataFrameMethods, WithMetadataBuilderMethods, WithMetadataMethods, WithRasterFrameLayerMethods} + + private def selector(column: Column) = (attr: Attribute) ⇒ + attr.name == column.columnName || attr.semanticEquals(column.expr) + + /** Map over the Attribute representation of Columns, modifying the one matching `column` with `op`. */ + private[rasterframes] def mapColumnAttribute(column: Column, op: Attribute ⇒ Attribute): DF = { + val analyzed = self.queryExecution.analyzed.output + val selects = selector(column) + val attrs = analyzed.map { attr ⇒ + if(selects(attr)) op(attr) else attr + } + self.select(attrs.map(a ⇒ new Column(a)): _*).asInstanceOf[DF] + } + + private[rasterframes] def addColumnMetadata(column: Column, op: MetadataBuilder ⇒ MetadataBuilder): DF = { + mapColumnAttribute(column, attr ⇒ { + val md = new MetadataBuilder().withMetadata(attr.metadata) + attr.withMetadata(op(md).build) + }) + } + + private[rasterframes] def fetchMetadataValue[D](column: Column, reader: (Attribute) ⇒ D): Option[D] = { + val analyzed = self.queryExecution.analyzed.output + analyzed.find(selector(column)).map(reader) + } + + private[rasterframes] + def setSpatialColumnRole[K: SpatialComponent: JsonFormat]( + column: Column, md: TileLayerMetadata[K]): DF = + addColumnMetadata(column, + _.attachContext(md.asColumnMetadata).tagSpatialKey + ) + + private[rasterframes] + def setTemporalColumnRole(column: Column): DF = + addColumnMetadata(column, _.tagTemporalKey) + + /** Get the role tag the column plays in the RasterFrameLayer, if any. */ + private[rasterframes] + def getColumnRole(column: Column): Option[String] = + fetchMetadataValue(column, _.metadata.getString(SPATIAL_ROLE_KEY)) + + /** Get the columns that are of type `Tile` */ + def tileColumns: Seq[Column] = + self.schema.fields + .filter(f => DynamicExtractors.tileExtractor.isDefinedAt(f.dataType)) + .map(f ⇒ self.col(f.name)) + + /** Get the columns that look like `ProjectedRasterTile`s. */ + def projRasterColumns: Seq[Column] = + self.schema.fields + .filter(_.dataType.conformsTo[ProjectedRasterTile]) + .map(f => self.col(f.name)) + + /** Get the columns that look like `Extent`s. */ + def extentColumns: Seq[Column] = + self.schema.fields + .filter(_.dataType.conformsTo[Extent]) + .map(f => self.col(f.name)) + + /** Get the columns that look like `CRS`s. */ + def crsColumns: Seq[Column] = + self.schema.fields + .filter(_.dataType.conformsTo[CRS]) + .map(f => self.col(f.name)) + + /** Get the columns that are not of type `Tile` */ + def notTileColumns: Seq[Column] = + self.schema.fields + .filter(f => !DynamicExtractors.tileExtractor.isDefinedAt(f.dataType)) + .map(f ⇒ self.col(f.name)) + + /** Get the spatial column. */ + def spatialKeyColumn: Option[TypedColumn[Any, SpatialKey]] = { + val key = findSpatialKeyField + key + .map(_.name) + .map(self.col(_).as[SpatialKey]) + } + + /** Get the temporal column, if any. */ + def temporalKeyColumn: Option[TypedColumn[Any, TemporalKey]] = { + val key = findTemporalKeyField + key.map(_.name).map(self.col(_).as[TemporalKey]) + } + + /** Find the field tagged with the requested `role` */ + private[rasterframes] def findRoleField(role: String): Option[StructField] = + self.schema.fields.find( + f ⇒ + f.metadata.contains(SPATIAL_ROLE_KEY) && + f.metadata.getString(SPATIAL_ROLE_KEY) == role + ) + + /** The spatial key is the first one found with context metadata attached to it. */ + private[rasterframes] def findSpatialKeyField: Option[StructField] = + findRoleField(SPATIAL_KEY_COLUMN.columnName) + + /** The temporal key is the first one found with the temporal tag. */ + private[rasterframes] def findTemporalKeyField: Option[StructField] = + findRoleField(TEMPORAL_KEY_COLUMN.columnName) + + /** Renames all columns such that they start with the given prefix string. + * Useful for preparing dataframes for joins where duplicate names may arise. + */ + def withPrefixedColumnNames(prefix: String): DF = + self.columns.foldLeft(self)((df, c) ⇒ df.withColumnRenamed(c, s"$prefix$c").asInstanceOf[DF]) + + /** + * Performs a jeft join on the dataframe `right` to this one, reprojecting and merging tiles as necessary. + * The operation is logically a "left outer" join, with the left side also determining the target CRS and extents. + * Right side may have multiple Tile columns. Assumes both dataframes use the column names `extent` and `crs` for + * the Extent and CRS details for each row. The join expression used is: + * + * {{{ + * st_intersects(st_geometry(leftExtent), st_reproject(st_geometry(rightExtent), rightCRS, leftCRS)) + * }}} + * + * @param right Right side of the join. + * @return joined dataframe + */ + def rasterJoin(right: DataFrame): DataFrame = RasterJoin(self, right) + + /** + * Performs a jeft join on the dataframe `right` to this one, reprojecting and merging tiles as necessary. + * The operation is logically a "left outer" join, with the left side also determining the target CRS and extents. + * Right side may have multiple Tile columns. This variant allows for the specific geospatial columns to be + * specified. The join expression used is: + * {{{ + * st_intersects(st_geometry(leftExtent), st_reproject(st_geometry(rightExtent), rightCRS, leftCRS)) + * }}} + * + * @param right right dataframe + * @param leftExtent this (left) dataframe's Extent column + * @param leftCRS this (left) datafrasme's CRS column + * @param rightExtent right dataframe's CRS extent + * @param rightCRS right dataframe's CRS column + * @return joined dataframe + */ + def rasterJoin(right: DataFrame, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column): DataFrame = + RasterJoin(self, right, leftExtent, leftCRS, rightExtent, rightCRS) + + /** + * Performs a jeft join on the dataframe `right` to this one, reprojecting and merging tiles as necessary. + * The operation is logically a "left outer" join, with the left side also determining the target CRS and extents. + * Right side may have multiple Tile columns. This variant allows for the specific geospatial columns and join + * expression to be specified. + * + * @param right right dataframe + * @param leftExtent this (left) dataframe's Extent column + * @param joinExpr join expression + * @param leftCRS this (left) datafrasme's CRS column + * @param rightExtent right dataframe's CRS extent + * @param rightCRS right dataframe's CRS column + * @return joined dataframe + */ + def rasterJoin(right: DataFrame, joinExpr: Column, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column): DataFrame = + RasterJoin(self, right, joinExpr, leftExtent, leftCRS, rightExtent, rightCRS) + + + /** Layout contents of RasterFrame to a layer. Assumes CRS and extent columns exist. */ + def toLayer(tlm: TileLayerMetadata[SpatialKey]): RasterFrameLayer = ReprojectToLayer(self, tlm) + + /** Coerces this DataFrame to a RasterFrameLayer after ensuring it has: + * + *
                + *
              1. a space or space-time key column + *
              2. one or more tile columns + *
              3. tile layout metadata + *
                  + * + * If any of the above are violated, and [[IllegalArgumentException]] is thrown. + * + * @return validated RasterFrameLayer + * @throws IllegalArgumentException when constraints are not met. + */ + @throws[IllegalArgumentException] + def asLayer: RasterFrameLayer = { + val potentialRF = certifyRasterframe(self) + + require( + potentialRF.findSpatialKeyField.nonEmpty, + "A RasterFrameLayer requires a column identified as a spatial key" + ) + + require(potentialRF.tileColumns.nonEmpty, "A RasterFrameLayer requires at least one tile column") + + require( + Try(potentialRF.tileLayerMetadata).isSuccess, + "A RasterFrameLayer requires embedded TileLayerMetadata" + ) + + potentialRF + } + + /** + * Convert DataFrame already in a uniform gridding into a RasterFrameLayer + * + * @param spatialKey The column where the spatial key is stored + * @param tlm Metadata describing layout under which tiles were created. Note: no checking is + * performed to ensure metadata, key-space, and tiles are coherent. + * @throws IllegalArgumentException when constraints outlined in `asLayer` are not met. + * @return Encoded RasterFrameLayer + */ + @throws[IllegalArgumentException] + private[rasterframes] + def asLayer(spatialKey: Column, tlm: TileLayerMetadata[SpatialKey]): RasterFrameLayer = + setSpatialColumnRole(spatialKey, tlm).asLayer + + /** + * Convert DataFrame already in a uniform gridding into a RasterFrameLayer + * + * @param spatialKey The column where the spatial key is stored + * @param temporalKey The column tagged under the temporal role + * @param tlm Metadata describing layout under which tiles were created. Note: no checking is + * performed to ensure metadata, key-space, and tiles are coherent. + * @throws IllegalArgumentException when constraints outlined in `asLayer` are not met. + * @return Encoded RasterFrameLayer + */ + @throws[IllegalArgumentException] + private[rasterframes] + def asLayer(spatialKey: Column, temporalKey: Column, tlm: TileLayerMetadata[SpaceTimeKey]): RasterFrameLayer = + setSpatialColumnRole(spatialKey, tlm) + .setTemporalColumnRole(temporalKey) + .asLayer + + /** + * Converts [[DataFrame]] to a RasterFrameLayer if the following constraints are fulfilled: + * + *
                    + *
                  1. a space or space-time key column + *
                  2. one or more tile columns + *
                  3. tile layout metadata + *
                      + * + * @return Some[RasterFrameLayer] if constraints fulfilled, [[None]] otherwise. + */ + def asLayerSafely: Option[RasterFrameLayer] = Try(asLayer).toOption + + /** + * Tests for the following conditions on the [[DataFrame]]: + * + *
                        + *
                      1. a space or space-time key column + *
                      2. one or more tile columns + *
                      3. tile layout metadata + *
                          + * + * @return true if all constraints are fulfilled, false otherwise. + */ + def isAlreadyLayer: Boolean = Try(asLayer).isSuccess + + /** Internal method for slapping the RasterFreameLayer seal of approval on a DataFrame. + * Only call if if you are sure it has a spatial key and tile columns and TileLayerMetadata. */ + private[rasterframes] def certify = certifyRasterframe(self) +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/Implicits.scala similarity index 82% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/Implicits.scala index 8fdda51a0..563e03e87 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/Implicits.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/Implicits.scala @@ -15,16 +15,18 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.extensions +package org.locationtech.rasterframes.extensions -import astraea.spark.rasterframes.RasterFrame -import astraea.spark.rasterframes.util.{WithMergeMethods, WithPrototypeMethods} +import org.locationtech.rasterframes.RasterFrameLayer +import org.locationtech.rasterframes.util.{WithMergeMethods, WithPrototypeMethods} import geotrellis.raster._ +import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.spark.{Metadata, SpaceTimeKey, SpatialKey, TileLayerMetadata} import geotrellis.util.MethodExtensions -import org.apache.hadoop.conf.{Configuration => HadoopConfiguration} import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql._ @@ -50,9 +52,11 @@ trait Implicits { implicit class WithProjectedRasterMethods[T <: CellGrid: WithMergeMethods: WithPrototypeMethods: TypeTag]( val self: ProjectedRaster[T]) extends ProjectedRasterMethods[T] + implicit class WithSinglebandGeoTiffMethods(val self: SinglebandGeoTiff) extends SinglebandGeoTiffMethods + implicit class WithDataFrameMethods[D <: DataFrame](val self: D) extends DataFrameMethods[D] - implicit class WithRasterFrameMethods(val self: RasterFrame) extends RasterFrameMethods + implicit class WithRasterFrameLayerMethods(val self: RasterFrameLayer) extends RasterFrameLayerMethods implicit class WithSpatialContextRDDMethods[T <: CellGrid]( val self: RDD[(SpatialKey, T)] with Metadata[TileLayerMetadata[SpatialKey]] @@ -62,17 +66,17 @@ trait Implicits { val self: RDD[(SpaceTimeKey, T)] with Metadata[TileLayerMetadata[SpaceTimeKey]] )(implicit spark: SparkSession) extends SpatioTemporalContextRDDMethods[T] - private[astraea] + private[rasterframes] implicit class WithMetadataMethods[R: JsonFormat](val self: R) extends MetadataMethods[R] - private[astraea] + private[rasterframes] implicit class WithMetadataAppendMethods(val self: SMetadata) extends MethodExtensions[SMetadata] { def append = new MetadataBuilder().withMetadata(self) } - private[astraea] + private[rasterframes] implicit class WithMetadataBuilderMethods(val self: MetadataBuilder) extends MetadataBuilderMethods } diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/KryoMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/KryoMethods.scala similarity index 93% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/KryoMethods.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/KryoMethods.scala index 52ed69557..7b291d7d6 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/KryoMethods.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/KryoMethods.scala @@ -19,12 +19,12 @@ * */ -package astraea.spark.rasterframes.extensions -import astraea.spark.rasterframes.util.RFKryoRegistrator +package org.locationtech.rasterframes.extensions import geotrellis.util.MethodExtensions import org.apache.spark.SparkConf import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.SparkSession +import org.locationtech.rasterframes.util.RFKryoRegistrator object KryoMethods { val kryoProperties = Map("spark.serializer" -> classOf[KryoSerializer].getName, diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataBuilderMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataBuilderMethods.scala similarity index 85% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataBuilderMethods.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataBuilderMethods.scala index 491c30b4d..fc2401bb5 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataBuilderMethods.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataBuilderMethods.scala @@ -15,14 +15,16 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.extensions +package org.locationtech.rasterframes.extensions -import astraea.spark.rasterframes.{MetadataKeys, StandardColumns} import geotrellis.util.MethodExtensions import org.apache.spark.sql.types.{Metadata, MetadataBuilder} -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes.util._ +import org.locationtech.rasterframes.{MetadataKeys, StandardColumns} /** * Convenience to deal with boilerplate associated with adding @@ -30,7 +32,7 @@ import astraea.spark.rasterframes.util._ * * @since 12/21/17 */ -private[astraea] +private[rasterframes] abstract class MetadataBuilderMethods extends MethodExtensions[MetadataBuilder] with MetadataKeys with StandardColumns { def attachContext(md: Metadata) = self.putMetadata(CONTEXT_METADATA_KEY, md) def tagSpatialKey = self.putString(SPATIAL_ROLE_KEY, SPATIAL_KEY_COLUMN.columnName) diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataMethods.scala similarity index 60% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataMethods.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataMethods.scala index e1a886e60..5d96abdf4 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/MetadataMethods.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/MetadataMethods.scala @@ -1,20 +1,26 @@ /* + * This software is licensed under the Apache 2 license, quoted below. + * * Copyright 2017 Astraea, Inc. * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * [http://www.apache.org/licenses/LICENSE-2.0] * * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.extensions +package org.locationtech.rasterframes.extensions + import geotrellis.util.MethodExtensions import spray.json.{JsObject, JsonFormat} import org.apache.spark.sql.types.{Metadata ⇒ SQLMetadata} diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/ProjectedRasterMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/ProjectedRasterMethods.scala similarity index 66% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/ProjectedRasterMethods.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/ProjectedRasterMethods.scala index 96709ef10..81f5054f9 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/ProjectedRasterMethods.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/ProjectedRasterMethods.scala @@ -1,20 +1,41 @@ -package astraea.spark.rasterframes.extensions +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.extensions import java.time.ZonedDateTime -import astraea.spark.rasterframes.util._ -import astraea.spark.rasterframes.{PairRDDConverter, RasterFrame, StandardColumns} import geotrellis.raster.{CellGrid, ProjectedRaster} import geotrellis.spark._ import geotrellis.spark.tiling._ import geotrellis.util.MethodExtensions import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession +import org.locationtech.rasterframes.util._ +import org.locationtech.rasterframes.{PairRDDConverter, RasterFrameLayer, StandardColumns} import scala.reflect.runtime.universe._ /** - * Extension methods on [[ProjectedRaster]] for creating [[RasterFrame]]s. + * Extension methods on [[ProjectedRaster]] for creating [[RasterFrameLayer]]s. * * @since 8/10/17 */ @@ -24,63 +45,64 @@ abstract class ProjectedRasterMethods[T <: CellGrid: WithMergeMethods: WithProto type XTileLayerRDD[K] = RDD[(K, T)] with Metadata[TileLayerMetadata[K]] /** - * Convert the wrapped [[ProjectedRaster]] into a [[RasterFrame]] with a + * Convert the wrapped [[ProjectedRaster]] into a [[RasterFrameLayer]] with a * single row. * - * @param spark [[SparkSession]] in which to create [[RasterFrame]] + * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]] */ - def toRF(implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrame = toRF(TILE_COLUMN.columnName) + def toLayer(implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = + toLayer(TILE_COLUMN.columnName) /** - * Convert the wrapped [[ProjectedRaster]] into a [[RasterFrame]] with a + * Convert the wrapped [[ProjectedRaster]] into a [[RasterFrameLayer]] with a * single row. * - * @param spark [[SparkSession]] in which to create [[RasterFrame]] + * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]] */ - def toRF(tileColName: String) - (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrame = { + def toLayer(tileColName: String) + (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = { val (cols, rows) = self.raster.dimensions - toRF(cols, rows, tileColName) + toLayer(cols, rows, tileColName) } /** - * Convert the [[ProjectedRaster]] into a [[RasterFrame]] using the + * Convert the [[ProjectedRaster]] into a [[RasterFrameLayer]] using the * given dimensions as the target per-row tile size. * * @param tileCols Max number of horizontal cells per tile * @param tileRows Max number of vertical cells per tile - * @param spark [[SparkSession]] in which to create [[RasterFrame]] + * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]] */ - def toRF(tileCols: Int, tileRows: Int) - (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrame = - toRF(tileCols, tileRows, TILE_COLUMN.columnName) + def toLayer(tileCols: Int, tileRows: Int) + (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = + toLayer(tileCols, tileRows, TILE_COLUMN.columnName) /** - * Convert the [[ProjectedRaster]] into a [[RasterFrame]] using the + * Convert the [[ProjectedRaster]] into a [[RasterFrameLayer]] using the * given dimensions as the target per-row tile size. * * @param tileCols Max number of horizontal cells per tile * @param tileRows Max number of vertical cells per tile * @param tileColName Name to give the created tile column - * @param spark [[SparkSession]] in which to create [[RasterFrame]] + * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]] */ - def toRF(tileCols: Int, tileRows: Int, tileColName: String) - (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrame = { - toTileLayerRDD(tileCols, tileRows).toRF(tileColName) + def toLayer(tileCols: Int, tileRows: Int, tileColName: String) + (implicit spark: SparkSession, schema: PairRDDConverter[SpatialKey, T]): RasterFrameLayer = { + toTileLayerRDD(tileCols, tileRows).toLayer(tileColName) } /** - * Convert the [[ProjectedRaster]] into a [[RasterFrame]] using the + * Convert the [[ProjectedRaster]] into a [[RasterFrameLayer]] using the * given dimensions as the target per-row tile size and singular timestamp as the temporal component. * * @param tileCols Max number of horizontal cells per tile * @param tileRows Max number of vertical cells per tile. * @param timestamp Temporal key value to assign to tiles. - * @param spark [[SparkSession]] in which to create [[RasterFrame]] + * @param spark [[SparkSession]] in which to create [[RasterFrameLayer]] */ - def toRF(tileCols: Int, tileRows: Int, timestamp: ZonedDateTime) - (implicit spark: SparkSession, schema: PairRDDConverter[SpaceTimeKey, T]): RasterFrame = - toTileLayerRDD(tileCols, tileRows, timestamp).toRF + def toLayer(tileCols: Int, tileRows: Int, timestamp: ZonedDateTime) + (implicit spark: SparkSession, schema: PairRDDConverter[SpaceTimeKey, T]): RasterFrameLayer = + toTileLayerRDD(tileCols, tileRows, timestamp).toLayer /** * Convert the [[ProjectedRaster]] into a [[TileLayerRDD[SpatialKey]] using the @@ -92,7 +114,7 @@ abstract class ProjectedRasterMethods[T <: CellGrid: WithMergeMethods: WithProto */ def toTileLayerRDD(tileCols: Int, tileRows: Int)(implicit spark: SparkSession): XTileLayerRDD[SpatialKey] = { - val layout = LayoutDefinition(self.rasterExtent, tileCols, tileRows) + val layout = LayoutDefinition(self.raster.rasterExtent, tileCols, tileRows) val kb = KeyBounds(SpatialKey(0, 0), SpatialKey(layout.layoutCols - 1, layout.layoutRows - 1)) val tlm = TileLayerMetadata(self.tile.cellType, layout, self.extent, self.crs, kb) @@ -115,7 +137,7 @@ abstract class ProjectedRasterMethods[T <: CellGrid: WithMergeMethods: WithProto * @param spark [[SparkSession]] in which to create RDD */ def toTileLayerRDD(tileCols: Int, tileRows: Int, timestamp: ZonedDateTime)(implicit spark: SparkSession): XTileLayerRDD[SpaceTimeKey] = { - val layout = LayoutDefinition(self.rasterExtent, tileCols, tileRows) + val layout = LayoutDefinition(self.raster.rasterExtent, tileCols, tileRows) val kb = KeyBounds(SpaceTimeKey(0, 0, timestamp), SpaceTimeKey(layout.layoutCols - 1, layout.layoutRows - 1, timestamp)) val tlm = TileLayerMetadata(self.tile.cellType, layout, self.extent, self.crs, kb) diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/RFSpatialColumnMethods.scala similarity index 65% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/RFSpatialColumnMethods.scala index af744f5f4..4eade42ad 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/RFSpatialColumnMethods.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/RFSpatialColumnMethods.scala @@ -15,56 +15,71 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.extensions +package org.locationtech.rasterframes.extensions -import astraea.spark.rasterframes.util._ -import astraea.spark.rasterframes.{RasterFrame, StandardColumns} -import com.vividsolutions.jts.geom.{Point, Polygon} +import org.locationtech.rasterframes.util._ +import org.locationtech.rasterframes.RasterFrameLayer +import org.locationtech.jts.geom.Point import geotrellis.proj4.LatLng import geotrellis.spark.SpatialKey import geotrellis.spark.tiling.MapKeyTransform import geotrellis.util.MethodExtensions +import geotrellis.vector.Extent import org.apache.spark.sql.Row -import org.apache.spark.sql.functions.{asc, udf ⇒ sparkUdf} +import org.apache.spark.sql.functions.{asc, udf => sparkUdf} import org.apache.spark.sql.types.{DoubleType, StructField, StructType} import org.locationtech.geomesa.curve.Z2SFC +import org.locationtech.rasterframes.StandardColumns /** - * RasterFrame extension methods associated with adding spatially descriptive columns. + * RasterFrameLayer extension methods associated with adding spatially descriptive columns. * * @since 12/15/17 */ -trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with StandardColumns { - import Implicits.{WithDataFrameMethods, WithRasterFrameMethods} +trait RFSpatialColumnMethods extends MethodExtensions[RasterFrameLayer] with StandardColumns { + import Implicits.{WithDataFrameMethods, WithRasterFrameLayerMethods} import org.locationtech.geomesa.spark.jts._ /** Returns the key-space to map-space coordinate transform. */ def mapTransform: MapKeyTransform = self.tileLayerMetadata.merge.mapTransform - private def keyCol2Bounds: Row ⇒ Polygon = { + private def keyCol2Extent: Row ⇒ Extent = { val transform = self.sparkSession.sparkContext.broadcast(mapTransform) - (r: Row) ⇒ transform.value.keyToExtent(SpatialKey(r.getInt(0), r.getInt(1))).jtsGeom + r ⇒ transform.value.keyToExtent(SpatialKey(r.getInt(0), r.getInt(1))) } private def keyCol2LatLng: Row ⇒ (Double, Double) = { val transform = self.sparkSession.sparkContext.broadcast(mapTransform) val crs = self.tileLayerMetadata.merge.crs - (r: Row) ⇒ { + r ⇒ { val center = transform.value.keyToExtent(SpatialKey(r.getInt(0), r.getInt(1))).center.reproject(crs, LatLng) (center.x, center.y) } } + /** + * Append a column containing the extent of the row's spatial key. + * Coordinates are in native CRS. + * @param colName name of column to append. Defaults to "extent" + * @return updated RasterFrameLayer + */ + def withExtent(colName: String = EXTENT_COLUMN.columnName): RasterFrameLayer = { + val key2Extent = sparkUdf(keyCol2Extent) + self.withColumn(colName, key2Extent(self.spatialKeyColumn)).certify + } + /** * Append a column containing the bounds of the row's spatial key. * Coordinates are in native CRS. - * @param colName name of column to append. Defaults to "bounds" - * @return updated RasterFrame + * @param colName name of column to append. Defaults to "geometry" + * @return updated RasterFrameLayer */ - def withBounds(colName: String = BOUNDS_COLUMN.columnName): RasterFrame = { - val key2Bounds = sparkUdf(keyCol2Bounds) + def withGeometry(colName: String = GEOMETRY_COLUMN.columnName): RasterFrameLayer = { + val key2Bounds = sparkUdf(keyCol2Extent andThen (_.jtsGeom)) self.withColumn(colName, key2Bounds(self.spatialKeyColumn)).certify } @@ -72,10 +87,10 @@ trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with Standard * Append a column containing the center of the row's spatial key. * Coordinate is in native CRS. * @param colName name of column to append. Defaults to "center" - * @return updated RasterFrame + * @return updated RasterFrameLayer */ - def withCenter(colName: String = CENTER_COLUMN.columnName): RasterFrame = { - val key2Center = sparkUdf(keyCol2Bounds andThen (_.getCentroid)) + def withCenter(colName: String = CENTER_COLUMN.columnName): RasterFrameLayer = { + val key2Center = sparkUdf(keyCol2Extent andThen (_.center.jtsGeom)) self.withColumn(colName, key2Center(self.spatialKeyColumn).as[Point]).certify } @@ -83,9 +98,9 @@ trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with Standard * Append a column containing the center of the row's spatial key. * Coordinate is in (longitude, latitude) (EPSG:4326). * @param colName name of column to append. Defaults to "center" - * @return updated RasterFrame + * @return updated RasterFrameLayer */ - def withCenterLatLng(colName: String = "center"): RasterFrame = { + def withCenterLatLng(colName: String = "center"): RasterFrameLayer = { val key2Center = sparkUdf(keyCol2LatLng) self.withColumn(colName, key2Center(self.spatialKeyColumn).cast(RFSpatialColumnMethods.LngLatStructType)).certify } @@ -94,9 +109,9 @@ trait RFSpatialColumnMethods extends MethodExtensions[RasterFrame] with Standard * Appends a spatial index column * @param colName name of new column to create. Defaults to `index` * @param applyOrdering if true, adds `.orderBy(asc(colName))` to result. Defaults to `true` - * @return RasterFrame with index column. + * @return RasterFrameLayer with index column. */ - def withSpatialIndex(colName: String = SPATIAL_INDEX_COLUMN.columnName, applyOrdering: Boolean = true): RasterFrame = { + def withSpatialIndex(colName: String = SPATIAL_INDEX_COLUMN.columnName, applyOrdering: Boolean = true): RasterFrameLayer = { val zindex = sparkUdf(keyCol2LatLng andThen (p ⇒ Z2SFC.index(p._1, p._2).z)) self.withColumn(colName, zindex(self.spatialKeyColumn)) match { case rf if applyOrdering ⇒ rf.orderBy(asc(colName)).certify diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterFrameLayerMethods.scala similarity index 79% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/RasterFrameLayerMethods.scala index e83e55fd3..28f2839ed 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/RasterFrameMethods.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterFrameLayerMethods.scala @@ -1,54 +1,64 @@ /* + * This software is licensed under the Apache 2 license, quoted below. + * * Copyright 2017 Astraea, Inc. * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * [http://www.apache.org/licenses/LICENSE-2.0] * * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.extensions +package org.locationtech.rasterframes.extensions import java.time.ZonedDateTime -import astraea.spark.rasterframes.util._ -import astraea.spark.rasterframes.{MetadataKeys, RasterFrame} +import org.locationtech.rasterframes.util._ +import org.locationtech.rasterframes.RasterFrameLayer import geotrellis.proj4.CRS import geotrellis.raster.resample.{NearestNeighbor, ResampleMethod} import geotrellis.raster.{MultibandTile, ProjectedRaster, Tile, TileLayout} import geotrellis.spark._ import geotrellis.spark.io._ import geotrellis.spark.tiling.{LayoutDefinition, Tiler} -import geotrellis.util.{LazyLogging, MethodExtensions} +import geotrellis.util.MethodExtensions import geotrellis.vector.ProjectedExtent import org.apache.spark.annotation.Experimental import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{Metadata, TimestampType} import spray.json._ -import astraea.spark.rasterframes.encoders.StandardEncoders._ -import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._ +import org.locationtech.rasterframes.encoders.StandardEncoders._ +import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._ +import com.typesafe.scalalogging.LazyLogging +import org.locationtech.rasterframes.MetadataKeys +import org.locationtech.rasterframes.tiles.ShowableTile + import scala.reflect.runtime.universe._ /** - * Extension methods on [[RasterFrame]] type. - * @since 7/18/17 + * Extension methods on [[RasterFrameLayer]] type. + * + * @since 7/18/17 */ -trait RasterFrameMethods extends MethodExtensions[RasterFrame] +trait RasterFrameLayerMethods extends MethodExtensions[RasterFrameLayer] with RFSpatialColumnMethods with MetadataKeys with LazyLogging { - import Implicits.{WithDataFrameMethods, WithRasterFrameMethods} + import Implicits.{WithDataFrameMethods, WithRasterFrameLayerMethods} /** - * A convenience over `DataFrame.withColumnRenamed` whereby the `RasterFrame` type is maintained. + * A convenience over `DataFrame.withColumnRenamed` whereby the `RasterFrameLayer` type is maintained. */ - def withRFColumnRenamed(existingName: String, newName: String): RasterFrame = + def withRFColumnRenamed(existingName: String, newName: String): RasterFrameLayer = (self: DataFrame).withColumnRenamed(existingName, newName).certify /** Get the spatial column. */ @@ -66,7 +76,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] def tileLayerMetadata: Either[TileLayerMetadata[SpatialKey], TileLayerMetadata[SpaceTimeKey]] = { val spatialMD = self.findSpatialKeyField .map(_.metadata) - .getOrElse(throw new IllegalArgumentException(s"RasterFrame operation requsted on non-RasterFrame: $self")) + .getOrElse(throw new IllegalArgumentException(s"RasterFrameLayer operation requsted on non-RasterFrameLayer: $self")) if (self.findTemporalKeyField.nonEmpty) Right(extract[TileLayerMetadata[SpaceTimeKey]](CONTEXT_METADATA_KEY)(spatialMD)) @@ -74,12 +84,12 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] Left(extract[TileLayerMetadata[SpatialKey]](CONTEXT_METADATA_KEY)(spatialMD)) } - /** Get the CRS covering the RasterFrame. */ + /** Get the CRS covering the RasterFrameLayer. */ def crs: CRS = tileLayerMetadata.fold(_.crs, _.crs) - /** Add a temporal key to the RasterFrame, assigning the same temporal key to all rows. */ - def addTemporalComponent(value: TemporalKey): RasterFrame = { - require(self.temporalKeyColumn.isEmpty, "RasterFrame already has a temporal component") + /** Add a temporal key to the RasterFrameLayer, assigning the same temporal key to all rows. */ + def addTemporalComponent(value: TemporalKey): RasterFrameLayer = { + require(self.temporalKeyColumn.isEmpty, "RasterFrameLayer already has a temporal component") val tlm = tileLayerMetadata.left.get val newBounds: Bounds[SpaceTimeKey] = tlm.bounds.flatMap[SpaceTimeKey] { @@ -101,14 +111,14 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] } /** Create a temporal key from the given time and assign it as thea temporal key for all rows. */ - def addTemporalComponent(value: ZonedDateTime): RasterFrame = addTemporalComponent(TemporalKey(value)) + def addTemporalComponent(value: ZonedDateTime): RasterFrameLayer = addTemporalComponent(TemporalKey(value)) /** * Append a column containing the temporal key rendered as a TimeStamp. * @param colName name of column to add - * @return updated RasterFrame + * @return updated RasterFrameLayer */ - def withTimestamp(colName: String = TIMESTAMP_COLUMN.columnName): RasterFrame = { + def withTimestamp(colName: String = TIMESTAMP_COLUMN.columnName): RasterFrameLayer = { self.withColumn(colName, (TEMPORAL_KEY_COLUMN.getField("instant").as[Long] / 1000).cast(TimestampType)) .certify } @@ -124,7 +134,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`. */ @Experimental - def spatialJoin(right: RasterFrame, joinType: String = "inner"): RasterFrame = { + def spatialJoin(right: RasterFrameLayer, joinType: String = "inner"): RasterFrameLayer = { val left = self val leftMetadata = left.tileLayerMetadata.merge @@ -137,7 +147,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] ) } - def updateNames(rf: RasterFrame, + def updateNames(rf: RasterFrameLayer, prefix: String, sk: TypedColumn[Any, SpatialKey], tk: Option[TypedColumn[Any, TemporalKey]]) = { @@ -179,7 +189,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] /** * Performs a full RDD scans of the key column for the data extent, and updates the [[TileLayerMetadata]] data extent to match. */ - def clipLayerExtent: RasterFrame = { + def clipLayerExtent: RasterFrameLayer = { val metadata = tileLayerMetadata val extent = metadata.merge.extent val layout = metadata.merge.layout @@ -213,17 +223,36 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] } /** - * Convert a single tile column from RasterFrame to a GeoTrellis [[TileLayerRDD]] + * Convert a single tile column from RasterFrameLayer to a GeoTrellis [[TileLayerRDD]] * @param tileCol column with tiles to be the */ def toTileLayerRDD(tileCol: Column): Either[TileLayerRDD[SpatialKey], TileLayerRDD[SpaceTimeKey]] = tileLayerMetadata.fold( - tlm ⇒ Left(ContextRDD(self.select(self.spatialKeyColumn, tileCol.as[Tile]).rdd, tlm)), + tlm ⇒ { + val rdd = self.select(self.spatialKeyColumn, tileCol.as[Tile]) + .rdd + .map { + // Wrapped tiles can break GeoTrellis Avro code. + case (sk, wrapped: ShowableTile) => (sk, wrapped.delegate) + case o => o + } + + Left(ContextRDD(rdd, tlm)) + }, tlm ⇒ { val rdd = self .select(self.spatialKeyColumn, self.temporalKeyColumn.get, tileCol.as[Tile]) .rdd - .map { case (sk, tk, v) ⇒ (SpaceTimeKey(sk, tk), v) } + .map { + case (sk, tk, v) ⇒ + val tile = v match { + // Wrapped tiles can break GeoTrellis Avro code. + case wrapped: ShowableTile => wrapped.delegate + case o => o + } + + (SpaceTimeKey(sk, tk), tile) + } Right(ContextRDD(rdd, tlm)) } ) @@ -259,7 +288,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] private[rasterframes] def extract[M: JsonFormat](metadataKey: String)(md: Metadata) = md.getMetadata(metadataKey).json.parseJson.convertTo[M] - /** Convert the tiles in the RasterFrame into a single raster. For RasterFrames keyed with temporal keys, they + /** Convert the tiles in the RasterFrameLayer into a single raster. For RasterFrames keyed with temporal keys, they * will be merge undeterministically. */ def toRaster(tileCol: Column, rasterCols: Int, @@ -294,7 +323,7 @@ trait RasterFrameMethods extends MethodExtensions[RasterFrame] ProjectedRaster(croppedTile.tile, md.extent, md.crs) } - /** Convert the Red, Green & Blue assigned tiles in the RasterFrame into a single color composite raster. + /** Convert the Red, Green & Blue assigned tiles in the RasterFrameLayer into a single color composite raster. * For RasterFrames keyed with temporal keys, they will be merged underterministically. */ def toMultibandRaster( tileCols: Seq[Column], diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterJoin.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterJoin.scala new file mode 100644 index 000000000..e0cec7a8c --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/RasterJoin.scala @@ -0,0 +1,99 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.extensions +import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.functions.reproject_and_merge +import org.locationtech.rasterframes.util._ + +import scala.util.Random + +object RasterJoin { + + def apply(left: DataFrame, right: DataFrame): DataFrame = { + val df = apply(left, right, left("extent"), left("crs"), right("extent"), right("crs")) + df.drop(right("extent")).drop(right("crs")) + } + + def apply(left: DataFrame, right: DataFrame, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column): DataFrame = { + val leftGeom = st_geometry(leftExtent) + val rightGeomReproj = st_reproject(st_geometry(rightExtent), rightCRS, leftCRS) + val joinExpr = st_intersects(leftGeom, rightGeomReproj) + apply(left, right, joinExpr, leftExtent, leftCRS, rightExtent, rightCRS) + } + + def apply(left: DataFrame, right: DataFrame, joinExprs: Column, leftExtent: Column, leftCRS: Column, rightExtent: Column, rightCRS: Column): DataFrame = { + // Convert resolved column into a symbolic one. + def unresolved(c: Column): Column = col(c.columnName) + + // Unique id for temporary columns + val id = Random.alphanumeric.take(5).mkString("_", "", "_") + + // Post aggregation left extent. We preserve the original name. + val leftExtent2 = leftExtent.columnName + // Post aggregation left crs. We preserve the original name. + val leftCRS2 = leftCRS.columnName + // Post aggregation right extent. We create a new name. + val rightExtent2 = id + "extent" + // Post aggregation right crs. We create a new name. + val rightCRS2 = id + "crs" + + + // Gathering up various expressions we'll use to construct the result. + // After joining We will be doing a groupBy the LHS. We have to define the aggregations to perform after the groupBy. + // On the LHS we just want the first thing (subsequent ones should be identical. + val leftAggCols = left.columns.map(s => first(left(s), true) as s) + // On the RHS we collect result as a list. + val rightAggCtx = Seq(collect_list(rightExtent) as rightExtent2, collect_list(rightCRS) as rightCRS2) + val rightAggTiles = right.tileColumns.map(c => collect_list(c) as c.columnName) + val rightAggOther = right.notTileColumns + .filter(n => n.columnName != rightExtent.columnName && n.columnName != rightCRS.columnName) + .map(c => collect_list(c) as (c.columnName + "_agg")) + val aggCols = leftAggCols ++ rightAggTiles ++ rightAggCtx ++ rightAggOther + + // After the aggregation we take all the tiles we've collected and resample + merge + // into LHS extent/CRS. + // Use a representative tile from the left for the tile dimensions + val leftTile = left.tileColumns.headOption.getOrElse(throw new IllegalArgumentException("Need at least one target tile on LHS")) + val reprojCols = rightAggTiles.map(t => reproject_and_merge( + col(leftExtent2), col(leftCRS2), col(t.columnName), col(rightExtent2), col(rightCRS2), rf_dimensions(unresolved(leftTile)) + ) as t.columnName) + + val finalCols = leftAggCols.map(unresolved) ++ reprojCols ++ rightAggOther.map(unresolved) + + // Here's the meat: + left + // 1. Add a unique ID to each LHS row for subequent grouping. + .withColumn(id, monotonically_increasing_id()) + // 2. Perform the left-outer join + .join(right, joinExprs, joinType = "left") + // 3. Group by the unique ID, reestablishing the LHS count + .groupBy(col(id)) + // 4. Apply aggregation to left and right columns: + // a. LHS just take the first entity + // b. RHS collect all results in a list + .agg(aggCols.head, aggCols.tail: _*) + // 5. Perform merge on RHC tile column collections, pass everything else through. + .select(finalCols: _*) + } +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/ReprojectToLayer.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/ReprojectToLayer.scala new file mode 100644 index 000000000..c396deaee --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/ReprojectToLayer.scala @@ -0,0 +1,49 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.extensions + +import geotrellis.spark.{SpatialKey, TileLayerMetadata} +import org.apache.spark.sql._ +import org.apache.spark.sql.functions.broadcast +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.util._ +object ReprojectToLayer { + + def apply(df: DataFrame, tlm: TileLayerMetadata[SpatialKey]): RasterFrameLayer = { + // create a destination dataframe with crs and extend columns + // use RasterJoin to do the rest. + val gb = tlm.gridBounds + val crs = tlm.crs + + val gridItems = for { + (col, row) <- gb.coordsIter + sk = SpatialKey(col, row) + e = tlm.mapTransform(sk) + } yield (sk, e, crs) + + val dest = df.sparkSession.createDataFrame(gridItems.toSeq) + .toDF(SPATIAL_KEY_COLUMN.columnName, EXTENT_COLUMN.columnName, CRS_COLUMN.columnName) + val joined = RasterJoin(broadcast(dest), df) + + joined.asLayer(SPATIAL_KEY_COLUMN, tlm) + } +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/SQLContextMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/SQLContextMethods.scala similarity index 84% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/SQLContextMethods.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/SQLContextMethods.scala index bcd2b31c4..4a6df34cc 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/SQLContextMethods.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/SQLContextMethods.scala @@ -15,12 +15,14 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.extensions +package org.locationtech.rasterframes.extensions import geotrellis.util.MethodExtensions -import org.apache.spark.sql.{SQLContext, rf} +import org.apache.spark.sql.SQLContext /** @@ -30,7 +32,7 @@ import org.apache.spark.sql.{SQLContext, rf} */ trait SQLContextMethods extends MethodExtensions[SQLContext] { def withRasterFrames: SQLContext = { - astraea.spark.rasterframes.initRF(self) + org.locationtech.rasterframes.initRF(self) self } } diff --git a/core/src/main/scala/org/locationtech/rasterframes/extensions/SinglebandGeoTiffMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/SinglebandGeoTiffMethods.scala new file mode 100644 index 000000000..833ba80e3 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/SinglebandGeoTiffMethods.scala @@ -0,0 +1,59 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.extensions + +import geotrellis.proj4.CRS +import geotrellis.raster.io.geotiff.SinglebandGeoTiff +import geotrellis.util.MethodExtensions +import geotrellis.vector.Extent +import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.sql.{DataFrame, Row, SparkSession} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.model.TileDimensions + +trait SinglebandGeoTiffMethods extends MethodExtensions[SinglebandGeoTiff] { + def toDF(dims: TileDimensions = NOMINAL_TILE_DIMS)(implicit spark: SparkSession): DataFrame = { + + val segmentLayout = self.imageData.segmentLayout + val re = self.rasterExtent + val crs = self.crs + + val windows = segmentLayout.listWindows(dims.cols, dims.rows) + val subtiles = self.crop(windows) + + val rows = for { + (gridbounds, tile) ← subtiles.toSeq + } yield { + val extent = re.extentFor(gridbounds, false) + Row(extent.toRow, crs.toRow, tile) + } + + val schema = StructType(Seq( + StructField("extent", schemaOf[Extent], false), + StructField("crs", schemaOf[CRS], false), + StructField("tile", TileType, false) + )) + + spark.createDataFrame(spark.sparkContext.makeRDD(rows, 1), schema) + } +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/extensions/SparkSessionMethods.scala b/core/src/main/scala/org/locationtech/rasterframes/extensions/SparkSessionMethods.scala similarity index 86% rename from core/src/main/scala/astraea/spark/rasterframes/extensions/SparkSessionMethods.scala rename to core/src/main/scala/org/locationtech/rasterframes/extensions/SparkSessionMethods.scala index 9447e812e..a726b4052 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/extensions/SparkSessionMethods.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/extensions/SparkSessionMethods.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.extensions +package org.locationtech.rasterframes.extensions import geotrellis.util.MethodExtensions import org.apache.spark.sql.SparkSession @@ -29,7 +31,7 @@ import org.apache.spark.sql.SparkSession */ trait SparkSessionMethods extends MethodExtensions[SparkSession] { def withRasterFrames: SparkSession = { - astraea.spark.rasterframes.initRF(self.sqlContext) + org.locationtech.rasterframes.initRF(self.sqlContext) self } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/functions/package.scala b/core/src/main/scala/org/locationtech/rasterframes/functions/package.scala similarity index 65% rename from core/src/main/scala/astraea/spark/rasterframes/functions/package.scala rename to core/src/main/scala/org/locationtech/rasterframes/functions/package.scala index 060b08fa3..87894188a 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/functions/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/functions/package.scala @@ -1,28 +1,33 @@ /* + * This software is licensed under the Apache 2 license, quoted below. + * * Copyright 2017 Astraea, Inc. * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * [http://www.apache.org/licenses/LICENSE-2.0] * * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes - -import astraea.spark.rasterframes.expressions.aggstats._ -import astraea.spark.rasterframes.jts.ReprojectionTransformer -import astraea.spark.rasterframes.util.CRSParser -import com.vividsolutions.jts.geom.Geometry -import geotrellis.raster.mapalgebra.local._ +package org.locationtech.rasterframes +import geotrellis.proj4.CRS +import geotrellis.raster.reproject.Reproject import geotrellis.raster.{Tile, _} import geotrellis.vector.Extent -import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.functions.udf +import org.apache.spark.sql.{Row, SQLContext} +import org.locationtech.jts.geom.Geometry +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.model.TileDimensions /** * Module utils. @@ -66,6 +71,10 @@ package object functions { } } + private[rasterframes] val arrayToTile: (Array[_], Int, Int) ⇒ Tile = (a, cols, rows) ⇒ { + arrayToTile(cols, rows).apply(a) + } + /** Set the tile's no-data value. */ private[rasterframes] def withNoData(nodata: Double) = safeEval[Tile, Tile](_.withNoData(Some(nodata))) @@ -84,6 +93,8 @@ package object functions { } } + + /** Alias for constant tiles of zero */ private[rasterframes] val tileZeros: (Int, Int, String) ⇒ Tile = (cols, rows, cellTypeName) ⇒ makeConstantTile(0, cols, rows, cellTypeName) @@ -92,6 +103,36 @@ package object functions { private[rasterframes] val tileOnes: (Int, Int, String) ⇒ Tile = (cols, rows, cellTypeName) ⇒ makeConstantTile(1, cols, rows, cellTypeName) + val reproject_and_merge_f: (Row, Row, Seq[Tile], Seq[Row], Seq[Row], Row) => Tile = (leftExtentEnc: Row, leftCRSEnc: Row, tiles: Seq[Tile], rightExtentEnc: Seq[Row], rightCRSEnc: Seq[Row], leftDimsEnc: Row) => { + if (tiles.isEmpty) null + else { + require(tiles.length == rightExtentEnc.length && tiles.length == rightCRSEnc.length, "size mismatch") + + val leftExtent = leftExtentEnc.to[Extent] + val leftDims = leftDimsEnc.to[TileDimensions] + val leftCRS = leftCRSEnc.to[CRS] + val rightExtents = rightExtentEnc.map(_.to[Extent]) + val rightCRSs = rightCRSEnc.map(_.to[CRS]) + + val cellType = tiles.map(_.cellType).reduceOption(_ union _).getOrElse(tiles.head.cellType) + + // TODO: how to allow control over... expression? + val projOpts = Reproject.Options.DEFAULT + val dest: Tile = ArrayTile.empty(cellType, leftDims.cols, leftDims.rows) + //is there a GT function to do all this? + tiles.zip(rightExtents).zip(rightCRSs).map { + case ((tile, extent), crs) => + tile.reproject(extent, crs, leftCRS, projOpts) + }.foldLeft(dest)((d, t) => + d.merge(leftExtent, t.extent, t.tile, projOpts.method) + ) + } + } + + // NB: Don't be tempted to make this a `val`. Spark will barf if `withRasterFrames` hasn't been called first. + def reproject_and_merge = udf(reproject_and_merge_f) + .withName("reproject_and_merge") + private[rasterframes] val cellTypes: () ⇒ Seq[String] = () ⇒ Seq( @@ -125,24 +166,12 @@ package object functions { } } - /** Reporjects a geometry column from one CRS to another, where CRS are defined in Proj4 format. */ - private[rasterframes] val reprojectGeometryCRSName: (Geometry, String, String) ⇒ Geometry = - (sourceGeom, srcName, dstName) ⇒ { - val src = CRSParser(srcName) - val dst = CRSParser(dstName) - val trans = new ReprojectionTransformer(src, dst) - trans.transform(sourceGeom) - } - def register(sqlContext: SQLContext): Unit = { - sqlContext.udf.register("rf_make_constant_tile", makeConstantTile) - sqlContext.udf.register("rf_tile_zeros", tileZeros) - sqlContext.udf.register("rf_tile_ones", tileOnes) - + sqlContext.udf.register("rf_make_zeros_tile", tileZeros) + sqlContext.udf.register("rf_make_ones_tile", tileOnes) sqlContext.udf.register("rf_cell_types", cellTypes) sqlContext.udf.register("rf_rasterize", rasterize) - - sqlContext.udf.register("rf_reproject_geometry", reprojectGeometryCRSName) + sqlContext.udf.register("rf_array_to_tile", arrayToTile) } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala b/core/src/main/scala/org/locationtech/rasterframes/jts/Implicits.scala similarity index 92% rename from core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala rename to core/src/main/scala/org/locationtech/rasterframes/jts/Implicits.scala index e257ebfa5..358fdc258 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/jts/Implicits.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/jts/Implicits.scala @@ -15,21 +15,23 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.jts +package org.locationtech.rasterframes.jts import java.sql.{Date, Timestamp} import java.time.{LocalDate, ZonedDateTime} -import astraea.spark.rasterframes.expressions.SpatialRelation.{Contains, Intersects} -import com.vividsolutions.jts.geom._ +import org.locationtech.rasterframes.expressions.SpatialRelation.{Contains, Intersects} +import org.locationtech.jts.geom._ import geotrellis.util.MethodExtensions import geotrellis.vector.{Point ⇒ gtPoint} import org.apache.spark.sql.{Column, TypedColumn} import org.apache.spark.sql.functions._ import org.locationtech.geomesa.spark.jts.DataFrameFunctions.SpatialConstructors -import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._ +import org.locationtech.rasterframes.encoders.StandardEncoders.PrimitiveEncoders._ /** * Extension methods on typed columns allowing for DSL-like queries over JTS types. diff --git a/core/src/main/scala/astraea/spark/rasterframes/jts/ReprojectionTransformer.scala b/core/src/main/scala/org/locationtech/rasterframes/jts/ReprojectionTransformer.scala similarity index 85% rename from core/src/main/scala/astraea/spark/rasterframes/jts/ReprojectionTransformer.scala rename to core/src/main/scala/org/locationtech/rasterframes/jts/ReprojectionTransformer.scala index 1d583c739..c4751cb3c 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/jts/ReprojectionTransformer.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/jts/ReprojectionTransformer.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,13 +15,14 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.jts +package org.locationtech.rasterframes.jts -import com.vividsolutions.jts.geom.{CoordinateSequence, Geometry} -import com.vividsolutions.jts.geom.util.GeometryTransformer +import org.locationtech.jts.geom.{CoordinateSequence, Geometry} +import org.locationtech.jts.geom.util.GeometryTransformer import geotrellis.proj4.CRS /** diff --git a/core/src/main/scala/astraea/spark/rasterframes/ml/NoDataFilter.scala b/core/src/main/scala/org/locationtech/rasterframes/ml/NoDataFilter.scala similarity index 89% rename from core/src/main/scala/astraea/spark/rasterframes/ml/NoDataFilter.scala rename to core/src/main/scala/org/locationtech/rasterframes/ml/NoDataFilter.scala index dfe9499a3..5cd9e780e 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/ml/NoDataFilter.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ml/NoDataFilter.scala @@ -15,17 +15,22 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.ml +package org.locationtech.rasterframes.ml -import astraea.spark.rasterframes.ml.Parameters.HasInputCols +import org.locationtech.rasterframes.ml.Parameters.HasInputCols import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} import org.apache.spark.sql.Dataset import org.apache.spark.sql.types.StructType import java.util.ArrayList + +import org.locationtech.rasterframes.ml.Parameters.HasInputCols + import scala.collection.JavaConversions._ /** diff --git a/core/src/main/scala/astraea/spark/rasterframes/ml/Parameters.scala b/core/src/main/scala/org/locationtech/rasterframes/ml/Parameters.scala similarity index 92% rename from core/src/main/scala/astraea/spark/rasterframes/ml/Parameters.scala rename to core/src/main/scala/org/locationtech/rasterframes/ml/Parameters.scala index 4bc2fd476..4d273a7f9 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/ml/Parameters.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ml/Parameters.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.ml +package org.locationtech.rasterframes.ml import org.apache.spark.ml.param.{Params, StringArrayParam} diff --git a/core/src/main/scala/astraea/spark/rasterframes/ml/TileColumnSupport.scala b/core/src/main/scala/org/locationtech/rasterframes/ml/TileColumnSupport.scala similarity index 91% rename from core/src/main/scala/astraea/spark/rasterframes/ml/TileColumnSupport.scala rename to core/src/main/scala/org/locationtech/rasterframes/ml/TileColumnSupport.scala index eccc8f00e..d261f7e91 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/ml/TileColumnSupport.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ml/TileColumnSupport.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,10 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.ml +package org.locationtech.rasterframes.ml import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types.{StructField, StructType} diff --git a/core/src/main/scala/astraea/spark/rasterframes/ml/TileExploder.scala b/core/src/main/scala/org/locationtech/rasterframes/ml/TileExploder.scala similarity index 91% rename from core/src/main/scala/astraea/spark/rasterframes/ml/TileExploder.scala rename to core/src/main/scala/org/locationtech/rasterframes/ml/TileExploder.scala index d52b82d35..38f978231 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/ml/TileExploder.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ml/TileExploder.scala @@ -15,18 +15,20 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.ml +package org.locationtech.rasterframes.ml -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} import org.apache.spark.sql.Dataset import org.apache.spark.sql.functions.col import org.apache.spark.sql.types._ -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes.util._ /** * SparkML Transformer for expanding tiles into single cell rows with @@ -56,7 +58,7 @@ class TileExploder(override val uid: String) extends Transformer val (tiles, nonTiles) = selectTileAndNonTileFields(dataset.schema) val tileCols = tiles.map(f ⇒ col(f.name)) val nonTileCols = nonTiles.map(f ⇒ col(f.name)) - val exploder = explode_tiles(tileCols: _*) + val exploder = rf_explode_tiles(tileCols: _*) dataset.select(nonTileCols :+ exploder: _*) } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala b/core/src/main/scala/org/locationtech/rasterframes/model/CellContext.scala similarity index 70% rename from core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala rename to core/src/main/scala/org/locationtech/rasterframes/model/CellContext.scala index cac2903dd..95a2e1bf0 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/model/CellContext.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/model/CellContext.scala @@ -19,25 +19,27 @@ * */ -package astraea.spark.rasterframes.model -import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} +package org.locationtech.rasterframes.model + import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.types.{ShortType, StructField, StructType} +import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} +import CatalystSerializer._ -case class CellContext(tile_context: TileContext, tile_data_context: TileDataContext, col_index: Short, row_index: Short) +case class CellContext(tileContext: TileContext, tileDataContext: TileDataContext, colIndex: Short, rowIndex: Short) object CellContext { implicit val serializer: CatalystSerializer[CellContext] = new CatalystSerializer[CellContext] { override def schema: StructType = StructType(Seq( - StructField("tile_context", CatalystSerializer[TileContext].schema, false), - StructField("tile_data_context", CatalystSerializer[TileDataContext].schema, false), - StructField("col_index", ShortType, false), - StructField("row_index", ShortType, false) + StructField("tileContext", schemaOf[TileContext], false), + StructField("tileDataContext", schemaOf[TileDataContext], false), + StructField("colIndex", ShortType, false), + StructField("rowIndex", ShortType, false) )) override protected def to[R](t: CellContext, io: CatalystSerializer.CatalystIO[R]): R = io.create( - io.to(t.tile_context), - io.to(t.tile_data_context), - t.col_index, - t.row_index + io.to(t.tileContext), + io.to(t.tileDataContext), + t.colIndex, + t.rowIndex ) override protected def from[R](t: R, io: CatalystSerializer.CatalystIO[R]): CellContext = CellContext( io.get[TileContext](t, 0), diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala b/core/src/main/scala/org/locationtech/rasterframes/model/Cells.scala similarity index 61% rename from core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala rename to core/src/main/scala/org/locationtech/rasterframes/model/Cells.scala index acf847e45..1f7ae4d75 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/model/Cells.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/model/Cells.scala @@ -19,42 +19,61 @@ * */ -package astraea.spark.rasterframes.model -import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} -import astraea.spark.rasterframes.ref.RasterRef -import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile -import geotrellis.raster.{ArrayTile, Tile} +package org.locationtech.rasterframes.model + +import geotrellis.raster.{ArrayTile, ConstantTile, Tile} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.types.{BinaryType, StructField, StructType} +import org.locationtech.rasterframes +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} +import org.locationtech.rasterframes.ref.RasterRef +import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile +import org.locationtech.rasterframes.tiles.ShowableTile +import org.locationtech.rasterframes.tiles.ProjectedRasterTile.ConcreteProjectedRasterTile /** Represents the union of binary cell datas or a reference to the data.*/ case class Cells(data: Either[Array[Byte], RasterRef]) { def isRef: Boolean = data.isRight + /** Convert cells into either a RasterRefTile or an ArrayTile. */ def toTile(ctx: TileDataContext): Tile = { data.fold( - bytes => ArrayTile.fromBytes(bytes, ctx.cell_type, ctx.dimensions.cols, ctx.dimensions.rows), + bytes => { + val t = ArrayTile.fromBytes(bytes, ctx.cellType, ctx.dimensions.cols, ctx.dimensions.rows) + if (Cells.showableTiles) new ShowableTile(t) + else t + }, ref => RasterRefTile(ref) ) } } object Cells { + private val showableTiles = rasterframes.rfConfig.getBoolean("showable-tiles") /** Extracts the Cells from a Tile. */ def apply(t: Tile): Cells = { t match { + case prt: ConcreteProjectedRasterTile => + apply(prt.t) case ref: RasterRefTile => Cells(Right(ref.rr)) - case o => + case const: ConstantTile => + // Need to expand constant tiles so they can be interpreted properly in catalyst and Python. + // If we don't, the serialization breaks. + Cells(Left(const.toArrayTile().toBytes)) + case o => Cells(Left(o.toBytes)) } } implicit def cellsSerializer: CatalystSerializer[Cells] = new CatalystSerializer[Cells] { - override def schema: StructType = StructType(Seq( - StructField("cells", BinaryType, true), - StructField("ref", CatalystSerializer[RasterRef].schema, true) - )) + override def schema: StructType = + StructType( + Seq( + StructField("cells", BinaryType, true), + StructField("ref", schemaOf[RasterRef], true) + )) override protected def to[R](t: Cells, io: CatalystSerializer.CatalystIO[R]): R = io.create( t.data.left.getOrElse(null), t.data.right.map(rr => io.to(rr)).right.getOrElse(null) diff --git a/core/src/main/scala/org/locationtech/rasterframes/model/FixedRasterExtent.scala b/core/src/main/scala/org/locationtech/rasterframes/model/FixedRasterExtent.scala new file mode 100644 index 000000000..cdce274bb --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/model/FixedRasterExtent.scala @@ -0,0 +1,278 @@ +/* + * Copyright 2016 Azavea + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.locationtech.rasterframes.model + + +import geotrellis.raster._ +import geotrellis.vector._ + +import scala.math.ceil + +/** + * This class is a copy of the GeoTrellis 2.x `RasterExtent`, + * with [GT 3.0 fixes](https://github.com/locationtech/geotrellis/pull/2953/files) incorporated into the + * new `GridExtent[T]` class. This class should be removed after RasterFrames is upgraded to GT 3.x. + */ +case class FixedRasterExtent( + override val extent: Extent, + override val cellwidth: Double, + override val cellheight: Double, + cols: Int, + rows: Int +) extends GridExtent(extent, cellwidth, cellheight) with Grid { + import FixedRasterExtent._ + + if (cols <= 0) throw GeoAttrsError(s"invalid cols: $cols") + if (rows <= 0) throw GeoAttrsError(s"invalid rows: $rows") + + /** + * Convert map coordinates (x, y) to grid coordinates (col, row). + */ + final def mapToGrid(x: Double, y: Double): (Int, Int) = { + val col = floorWithTolerance((x - extent.xmin) / cellwidth).toInt + val row = floorWithTolerance((extent.ymax - y) / cellheight).toInt + (col, row) + } + + /** + * Convert map coordinate x to grid coordinate column. + */ + final def mapXToGrid(x: Double): Int = floorWithTolerance(mapXToGridDouble(x)).toInt + + /** + * Convert map coordinate x to grid coordinate column. + */ + final def mapXToGridDouble(x: Double): Double = (x - extent.xmin) / cellwidth + + /** + * Convert map coordinate y to grid coordinate row. + */ + final def mapYToGrid(y: Double): Int = floorWithTolerance(mapYToGridDouble(y)).toInt + + /** + * Convert map coordinate y to grid coordinate row. + */ + final def mapYToGridDouble(y: Double): Double = (extent.ymax - y ) / cellheight + + /** + * Convert map coordinate tuple (x, y) to grid coordinates (col, row). + */ + final def mapToGrid(mapCoord: (Double, Double)): (Int, Int) = { + val (x, y) = mapCoord + mapToGrid(x, y) + } + + /** + * Convert a point to grid coordinates (col, row). + */ + final def mapToGrid(p: Point): (Int, Int) = + mapToGrid(p.x, p.y) + + /** + * The map coordinate of a grid cell is the center point. + */ + final def gridToMap(col: Int, row: Int): (Double, Double) = { + val x = col * cellwidth + extent.xmin + (cellwidth / 2) + val y = extent.ymax - (row * cellheight) - (cellheight / 2) + + (x, y) + } + + /** + * For a give column, find the corresponding x-coordinate in the + * grid of the present [[FixedRasterExtent]]. + */ + final def gridColToMap(col: Int): Double = { + col * cellwidth + extent.xmin + (cellwidth / 2) + } + + /** + * For a give row, find the corresponding y-coordinate in the grid + * of the present [[FixedRasterExtent]]. + */ + final def gridRowToMap(row: Int): Double = { + extent.ymax - (row * cellheight) - (cellheight / 2) + } + + /** + * Gets the GridBounds aligned with this FixedRasterExtent that is the + * smallest subgrid of containing all points within the extent. The + * extent is considered inclusive on it's north and west borders, + * exclusive on it's east and south borders. See [[FixedRasterExtent]] + * for a discussion of grid and extent boundary concepts. + * + * The 'clamp' flag determines whether or not to clamp the + * GridBounds to the FixedRasterExtent; defaults to true. If false, + * GridBounds can contain negative values, or values outside of + * this FixedRasterExtent's boundaries. + * + * @param subExtent The extent to get the grid bounds for + * @param clamp A boolean + */ + def gridBoundsFor(subExtent: Extent, clamp: Boolean = true): GridBounds = { + // West and North boundaries are a simple mapToGrid call. + val (colMin, rowMin) = mapToGrid(subExtent.xmin, subExtent.ymax) + + // If South East corner is on grid border lines, we want to still only include + // what is to the West and\or North of the point. However if the border point + // is not directly on a grid division, include the whole row and/or column that + // contains the point. + val colMax = { + val colMaxDouble = mapXToGridDouble(subExtent.xmax) + if(math.abs(colMaxDouble - floorWithTolerance(colMaxDouble)) < FixedRasterExtent.epsilon) colMaxDouble.toInt - 1 + else colMaxDouble.toInt + } + + val rowMax = { + val rowMaxDouble = mapYToGridDouble(subExtent.ymin) + if(math.abs(rowMaxDouble - floorWithTolerance(rowMaxDouble)) < FixedRasterExtent.epsilon) rowMaxDouble.toInt - 1 + else rowMaxDouble.toInt + } + + if(clamp) { + GridBounds(math.min(math.max(colMin, 0), cols - 1), + math.min(math.max(rowMin, 0), rows - 1), + math.min(math.max(colMax, 0), cols - 1), + math.min(math.max(rowMax, 0), rows - 1)) + } else { + GridBounds(colMin, rowMin, colMax, rowMax) + } + } + + /** + * Combine two different [[FixedRasterExtent]]s (which must have the + * same cellsizes). The result is a new extent at the same + * resolution. + */ + def combine (that: FixedRasterExtent): FixedRasterExtent = { + if (cellwidth != that.cellwidth) + throw GeoAttrsError(s"illegal cellwidths: $cellwidth and ${that.cellwidth}") + if (cellheight != that.cellheight) + throw GeoAttrsError(s"illegal cellheights: $cellheight and ${that.cellheight}") + + val newExtent = extent.combine(that.extent) + val newRows = ceil(newExtent.height / cellheight).toInt + val newCols = ceil(newExtent.width / cellwidth).toInt + + FixedRasterExtent(newExtent, cellwidth, cellheight, newCols, newRows) + } + + /** + * Returns a [[RasterExtent]] with the same extent, but a modified + * number of columns and rows based on the given cell height and + * width. + */ + def withResolution(targetCellWidth: Double, targetCellHeight: Double): FixedRasterExtent = { + val newCols = math.ceil((extent.xmax - extent.xmin) / targetCellWidth).toInt + val newRows = math.ceil((extent.ymax - extent.ymin) / targetCellHeight).toInt + FixedRasterExtent(extent, targetCellWidth, targetCellHeight, newCols, newRows) + } + + /** + * Returns a [[FixedRasterExtent]] with the same extent, but a modified + * number of columns and rows based on the given cell height and + * width. + */ + def withResolution(cellSize: CellSize): FixedRasterExtent = + withResolution(cellSize.width, cellSize.height) + + /** + * Returns a [[FixedRasterExtent]] with the same extent and the given + * number of columns and rows. + */ + def withDimensions(targetCols: Int, targetRows: Int): FixedRasterExtent = + FixedRasterExtent(extent, targetCols, targetRows) + + /** + * Adjusts a raster extent so that it can encompass the tile + * layout. Will resample the extent, but keep the resolution, and + * preserve north and west borders + */ + def adjustTo(tileLayout: TileLayout): FixedRasterExtent = { + val totalCols = tileLayout.tileCols * tileLayout.layoutCols + val totalRows = tileLayout.tileRows * tileLayout.layoutRows + + val resampledExtent = Extent(extent.xmin, extent.ymax - (cellheight*totalRows), + extent.xmin + (cellwidth*totalCols), extent.ymax) + + FixedRasterExtent(resampledExtent, cellwidth, cellheight, totalCols, totalRows) + } + + /** + * Returns a new [[FixedRasterExtent]] which represents the GridBounds + * in relation to this FixedRasterExtent. + */ + def rasterExtentFor(gridBounds: GridBounds): FixedRasterExtent = { + val (xminCenter, ymaxCenter) = gridToMap(gridBounds.colMin, gridBounds.rowMin) + val (xmaxCenter, yminCenter) = gridToMap(gridBounds.colMax, gridBounds.rowMax) + val (hcw, hch) = (cellwidth / 2, cellheight / 2) + val e = Extent(xminCenter - hcw, yminCenter - hch, xmaxCenter + hcw, ymaxCenter + hch) + FixedRasterExtent(e, cellwidth, cellheight, gridBounds.width, gridBounds.height) + } +} + +/** + * The companion object for the [[FixedRasterExtent]] type. + */ +object FixedRasterExtent { + final val epsilon = 0.0000001 + + /** + * Create a new [[FixedRasterExtent]] from an Extent, a column, and a + * row. + */ + def apply(extent: Extent, cols: Int, rows: Int): FixedRasterExtent = { + val cw = extent.width / cols + val ch = extent.height / rows + FixedRasterExtent(extent, cw, ch, cols, rows) + } + + /** + * Create a new [[FixedRasterExtent]] from an Extent and a [[CellSize]]. + */ + def apply(extent: Extent, cellSize: CellSize): FixedRasterExtent = { + val cols = (extent.width / cellSize.width).toInt + val rows = (extent.height / cellSize.height).toInt + FixedRasterExtent(extent, cellSize.width, cellSize.height, cols, rows) + } + + /** + * Create a new [[FixedRasterExtent]] from a [[CellGrid]] and an Extent. + */ + def apply(tile: CellGrid, extent: Extent): FixedRasterExtent = + apply(extent, tile.cols, tile.rows) + + /** + * Create a new [[FixedRasterExtent]] from an Extent and a [[CellGrid]]. + */ + def apply(extent: Extent, tile: CellGrid): FixedRasterExtent = + apply(extent, tile.cols, tile.rows) + + + /** + * The same logic is used in QGIS: https://github.com/qgis/QGIS/blob/607664c5a6b47c559ed39892e736322b64b3faa4/src/analysis/raster/qgsalignraster.cpp#L38 + * The search query: https://github.com/qgis/QGIS/search?p=2&q=floor&type=&utf8=%E2%9C%93 + * + * GDAL uses smth like that, however it was a bit hard to track it down: + * https://github.com/OSGeo/gdal/blob/7601a637dfd204948d00f4691c08f02eb7584de5/gdal/frmts/vrt/vrtsources.cpp#L215 + * */ + def floorWithTolerance(value: Double): Double = { + val roundedValue = math.round(value) + if (math.abs(value - roundedValue) < epsilon) roundedValue + else math.floor(value) + } +} + diff --git a/core/src/main/scala/org/locationtech/rasterframes/model/LazyCRS.scala b/core/src/main/scala/org/locationtech/rasterframes/model/LazyCRS.scala new file mode 100644 index 000000000..66352e258 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/model/LazyCRS.scala @@ -0,0 +1,71 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.model + +import LazyCRS.EncodedCRS +import com.github.blemale.scaffeine.Scaffeine +import geotrellis.proj4.CRS +import org.locationtech.proj4j.CoordinateReferenceSystem + +class LazyCRS(val encoded: EncodedCRS) extends CRS { + private lazy val delegate = LazyCRS.cache.get(encoded) + override def proj4jCrs: CoordinateReferenceSystem = delegate.proj4jCrs + override def toProj4String: String = + if (encoded.startsWith("+proj")) encoded + else delegate.toProj4String + + override def equals(o: Any): Boolean = o match { + case l: LazyCRS => + encoded == l.encoded || + toProj4String == l.toProj4String || + super.equals(o) + case c: CRS => + toProj4String == c.toProj4String || + delegate.equals(c) + case _ => false + } +} + +object LazyCRS { + trait ValidatedCRS + type EncodedCRS = String with ValidatedCRS + + @transient + private lazy val mapper: PartialFunction[String, CRS] = { + case e if e.toUpperCase().startsWith("EPSG") => CRS.fromName(e) //not case-sensitive + case p if p.startsWith("+proj") => CRS.fromString(p) // case sensitive + case w if w.toUpperCase().startsWith("GEOGCS") => CRS.fromWKT(w) //only case-sensitive inside double quotes + } + + @transient + private lazy val cache = Scaffeine().build[String, CRS](mapper) + + def apply(crs: CRS): LazyCRS = apply(crs.toProj4String) + + def apply(value: String): LazyCRS = { + if (mapper.isDefinedAt(value)) { + new LazyCRS(value.asInstanceOf[EncodedCRS]) + } + else throw new IllegalArgumentException( + "crs string must be either EPSG code, +proj string, or OGC WKT") + } +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala b/core/src/main/scala/org/locationtech/rasterframes/model/TileContext.scala similarity index 83% rename from core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala rename to core/src/main/scala/org/locationtech/rasterframes/model/TileContext.scala index f5d49524c..912e1d81e 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/model/TileContext.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/model/TileContext.scala @@ -19,14 +19,16 @@ * */ -package astraea.spark.rasterframes.model -import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} -import astraea.spark.rasterframes.tiles.ProjectedRasterTile +package org.locationtech.rasterframes.model + import geotrellis.proj4.CRS import geotrellis.raster.Tile import geotrellis.vector.Extent import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.types.{StructField, StructType} +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} +import org.locationtech.rasterframes.tiles.ProjectedRasterTile case class TileContext(extent: Extent, crs: CRS) { def toProjectRasterTile(t: Tile): ProjectedRasterTile = ProjectedRasterTile(t, extent, crs) @@ -39,8 +41,8 @@ object TileContext { } implicit val serializer: CatalystSerializer[TileContext] = new CatalystSerializer[TileContext] { override def schema: StructType = StructType(Seq( - StructField("extent", CatalystSerializer[Extent].schema, false), - StructField("crs", CatalystSerializer[CRS].schema, false) + StructField("extent", schemaOf[Extent], false), + StructField("crs", schemaOf[CRS], false) )) override protected def to[R](t: TileContext, io: CatalystSerializer.CatalystIO[R]): R = io.create( io.to(t.extent), diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala b/core/src/main/scala/org/locationtech/rasterframes/model/TileDataContext.scala similarity index 80% rename from core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala rename to core/src/main/scala/org/locationtech/rasterframes/model/TileDataContext.scala index 121f8b845..9f6bd358f 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/model/TileDataContext.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/model/TileDataContext.scala @@ -19,15 +19,16 @@ * */ -package astraea.spark.rasterframes.model -import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} -import astraea.spark.rasterframes.encoders.CatalystSerializer._ +package org.locationtech.rasterframes.model + +import org.locationtech.rasterframes.encoders.CatalystSerializer._ import geotrellis.raster.{CellType, Tile} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.types.{StructField, StructType} +import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} /** Encapsulates all information about a tile aside from actual cell values. */ -case class TileDataContext(cell_type: CellType, dimensions: TileDimensions) +case class TileDataContext(cellType: CellType, dimensions: TileDimensions) object TileDataContext { /** Extracts the TileDataContext from a Tile. */ @@ -41,12 +42,12 @@ object TileDataContext { implicit val serializer: CatalystSerializer[TileDataContext] = new CatalystSerializer[TileDataContext] { override def schema: StructType = StructType(Seq( - StructField("cell_type", CatalystSerializer[CellType].schema, false), - StructField("dimensions", CatalystSerializer[TileDimensions].schema, false) + StructField("cellType", schemaOf[CellType], false), + StructField("dimensions", schemaOf[TileDimensions], false) )) override protected def to[R](t: TileDataContext, io: CatalystIO[R]): R = io.create( - io.to(t.cell_type), + io.to(t.cellType), io.to(t.dimensions) ) override protected def from[R](t: R, io: CatalystIO[R]): TileDataContext = TileDataContext( diff --git a/core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala b/core/src/main/scala/org/locationtech/rasterframes/model/TileDimensions.scala similarity index 87% rename from core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala rename to core/src/main/scala/org/locationtech/rasterframes/model/TileDimensions.scala index 2f7f579ba..e419ac668 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/model/TileDimensions.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/model/TileDimensions.scala @@ -19,13 +19,13 @@ * */ -package astraea.spark.rasterframes.model +package org.locationtech.rasterframes.model -import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO -import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} +import org.locationtech.rasterframes.encoders.CatalystSerializer.CatalystIO import geotrellis.raster.Grid import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.types.{ShortType, StructField, StructType} +import org.locationtech.rasterframes.encoders.CatalystSerializer /** * Typed wrapper for tile size information. @@ -49,8 +49,8 @@ object TileDimensions { ) override protected def from[R](t: R, io: CatalystIO[R]): TileDimensions = TileDimensions( - io.getShort(t, 0), - io.getShort(t, 1) + io.getShort(t, 0).toInt, + io.getShort(t, 1).toInt ) } diff --git a/core/src/main/scala/astraea/spark/rasterframes/package.scala b/core/src/main/scala/org/locationtech/rasterframes/rasterframes.scala similarity index 65% rename from core/src/main/scala/astraea/spark/rasterframes/package.scala rename to core/src/main/scala/org/locationtech/rasterframes/rasterframes.scala index 7b360ed25..1517e8f0e 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/rasterframes.scala @@ -1,56 +1,62 @@ /* + * This software is licensed under the Apache 2 license, quoted below. + * * Copyright 2017 Astraea, Inc. * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * [http://www.apache.org/licenses/LICENSE-2.0] * * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark - -import astraea.spark.rasterframes.encoders.StandardEncoders -import astraea.spark.rasterframes.util.ZeroSevenCompatibilityKit +package org.locationtech import com.typesafe.config.ConfigFactory import com.typesafe.scalalogging.LazyLogging +import geotrellis.raster.isData import geotrellis.raster.{Tile, TileFeature} import geotrellis.spark.{ContextRDD, Metadata, SpaceTimeKey, SpatialKey, TileLayerMetadata} import org.apache.spark.rdd.RDD -import org.apache.spark.sql._ +import org.apache.spark.sql.rf.{RasterSourceUDT, TileUDT} +import org.apache.spark.sql.{DataFrame, SQLContext, rf} import org.locationtech.geomesa.spark.jts.DataFrameFunctions +import org.locationtech.rasterframes.encoders.StandardEncoders +import org.locationtech.rasterframes.extensions.Implicits +import org.locationtech.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.util.ZeroSevenCompatibilityKit import shapeless.tag.@@ -import scala.language.higherKinds import scala.reflect.runtime.universe._ -/** - * Module providing support for RasterFrames. - * `import astraea.spark.rasterframes._`., and then call `rfInit(SQLContext)`. - * - * @since 7/18/17 - */ package object rasterframes extends StandardColumns with RasterFunctions with ZeroSevenCompatibilityKit.RasterFunctions - with rasterframes.extensions.Implicits + with Implicits with rasterframes.jts.Implicits with StandardEncoders with DataFrameFunctions.Library with LazyLogging { + @transient + private[rasterframes] + val rfConfig = ConfigFactory.load().getConfig("rasterframes") + /** The generally expected tile size, as defined by configuration property `rasterframes.nominal-tile-size`.*/ @transient - final val NOMINAL_TILE_SIZE: Int = ConfigFactory.load().getInt("rasterframes.nominal-tile-size") + final val NOMINAL_TILE_SIZE: Int = rfConfig.getInt("nominal-tile-size") + final val NOMINAL_TILE_DIMS: TileDimensions = TileDimensions(NOMINAL_TILE_SIZE, NOMINAL_TILE_SIZE) /** - * Initialization injection point. Must be called before any RasterFrame + * Initialization injection point. Must be called before any RasterFrameLayer * types are used. */ def initRF(sqlContext: SQLContext): Unit = { @@ -79,15 +85,21 @@ package object rasterframes extends StandardColumns rasterframes.rules.register(sqlContext) } + /** TileUDT type reference. */ + def TileType = new TileUDT() + + /** RasterSourceUDT type reference. */ + def RasterSourceType = new RasterSourceUDT() + /** - * A RasterFrame is just a DataFrame with certain invariants, enforced via the methods that create and transform them: + * A RasterFrameLayer is just a DataFrame with certain invariants, enforced via the methods that create and transform them: * 1. One column is a [[geotrellis.spark.SpatialKey]] or [[geotrellis.spark.SpaceTimeKey]] * 2. One or more columns is a [[Tile]] UDT. * 3. The `TileLayerMetadata` is encoded and attached to the key column. */ - type RasterFrame = DataFrame @@ RasterFrameTag + type RasterFrameLayer = DataFrame @@ RasterFrameTag - /** Tagged type for allowing compiler to help keep track of what has RasterFrame assurances applied to it. */ + /** Tagged type for allowing compiler to help keep track of what has RasterFrameLayer assurances applied to it. */ trait RasterFrameTag type TileFeatureLayerRDD[K, D] = @@ -113,6 +125,8 @@ package object rasterframes extends StandardColumns trait StandardLayerKey[T] extends Serializable { val selfType: TypeTag[T] def isType[R: TypeTag]: Boolean = typeOf[R] =:= selfType.tpe + def coerce[K >: T](tlm: TileLayerMetadata[_]): TileLayerMetadata[K] = + tlm.asInstanceOf[TileLayerMetadata[K]] } object StandardLayerKey { def apply[T: StandardLayerKey]: StandardLayerKey[T] = implicitly @@ -124,4 +138,8 @@ package object rasterframes extends StandardColumns } } + /** Test if a cell value evaluates to true: it is not NoData and it is non-zero */ + def isCellTrue(v: Double): Boolean = isData(v) & v != 0.0 + /** Test if a cell value evaluates to true: it is not NoData and it is non-zero */ + def isCellTrue(v: Int): Boolean = isData(v) & v != 0 } diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/DelegatingRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/DelegatingRasterSource.scala new file mode 100644 index 000000000..c460911a0 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/DelegatingRasterSource.scala @@ -0,0 +1,89 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import java.net.URI + +import geotrellis.contrib.vlm.{RasterSource => GTRasterSource} +import geotrellis.proj4.CRS +import geotrellis.raster.io.geotiff.Tags +import geotrellis.raster.{CellType, GridBounds, MultibandTile, Raster} +import geotrellis.vector.Extent +import org.locationtech.rasterframes.ref.RasterSource.URIRasterSource + +/** A RasterFrames RasterSource which delegates most operations to a geotrellis-contrib RasterSource */ +abstract class DelegatingRasterSource(source: URI, delegateBuilder: () => GTRasterSource) extends RasterSource with URIRasterSource { + @transient + @volatile + private var _delRef: GTRasterSource = _ + + private def retryableRead[R >: Null](f: GTRasterSource => R): R = synchronized { + try { + if (_delRef == null) + _delRef = delegateBuilder() + f(_delRef) + } + catch { + // On this exeception we attempt to recreate the delegate and read again. + case _: java.nio.BufferUnderflowException => + _delRef = null + val newDel = delegateBuilder() + val result = f(newDel) + _delRef = newDel + result + } + } + + // Bad? + override def equals(obj: Any): Boolean = obj match { + case drs: DelegatingRasterSource => drs.source == source + case _ => false + } + + override def hashCode(): Int = source.hashCode() + + // This helps reduce header reads between serializations + def info: SimpleRasterInfo = SimpleRasterInfo.cache.get(source.toASCIIString, _ => + retryableRead(rs => SimpleRasterInfo(rs)) + ) + + override def cols: Int = info.cols + override def rows: Int = info.rows + override def crs: CRS = info.crs + override def extent: Extent = info.extent + override def cellType: CellType = info.cellType + override def bandCount: Int = info.bandCount + override def tags: Tags = info.tags + + override protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] = + retryableRead(_.readBounds(bounds, bands)) + + override def read(bounds: GridBounds, bands: Seq[Int]): Raster[MultibandTile] = + retryableRead(_.read(bounds, bands) + .getOrElse(throw new IllegalArgumentException(s"Bounds '$bounds' outside of source")) + ) + + override def read(extent: Extent, bands: Seq[Int]): Raster[MultibandTile] = + retryableRead(_.read(extent, bands) + .getOrElse(throw new IllegalArgumentException(s"Extent '$extent' outside of source")) + ) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala new file mode 100644 index 000000000..481155f24 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/GDALRasterSource.scala @@ -0,0 +1,85 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import java.net.URI + +import com.azavea.gdal.GDALWarp +import com.typesafe.scalalogging.LazyLogging +import geotrellis.contrib.vlm.gdal.{GDALRasterSource => VLMRasterSource} +import geotrellis.proj4.CRS +import geotrellis.raster.io.geotiff.Tags +import geotrellis.raster.{CellType, GridBounds, MultibandTile, Raster} +import geotrellis.vector.Extent +import org.locationtech.rasterframes.ref.RasterSource.URIRasterSource + +case class GDALRasterSource(source: URI) extends RasterSource with URIRasterSource { + + @transient + private lazy val gdal: VLMRasterSource = { + val cleaned = source.toASCIIString + .replace("gdal+", "") + .replace("gdal:/", "") + // VSIPath doesn't like single slash "file:/path..." + val tweaked = + if (cleaned.matches("^file:/[^/].*")) + cleaned.replace("file:", "") + else cleaned + + VLMRasterSource(tweaked) + } + + protected def tiffInfo = SimpleRasterInfo.cache.get(source.toASCIIString, _ => SimpleRasterInfo(gdal)) + + override def crs: CRS = tiffInfo.crs + + override def extent: Extent = tiffInfo.extent + + private def metadata = Map.empty[String, String] + + override def cellType: CellType = tiffInfo.cellType + + override def bandCount: Int = tiffInfo.bandCount + + override def cols: Int = tiffInfo.cols + + override def rows: Int = tiffInfo.rows + + override def tags: Tags = Tags(metadata, List.empty) + + override protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] = + gdal.readBounds(bounds, bands) +} + +object GDALRasterSource extends LazyLogging { + def gdalVersion(): String = if (hasGDAL) GDALWarp.get_version_info("--version").trim else "not available" + + @transient + lazy val hasGDAL: Boolean = try { + val _ = new GDALWarp() + true + } catch { + case _: UnsatisfiedLinkError => + logger.warn("GDAL native bindings are not available. Falling back to JVM-based reader for GeoTIFF format.") + false + } +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/HadoopGeoTiffRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/HadoopGeoTiffRasterSource.scala new file mode 100644 index 000000000..3249f1bce --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/HadoopGeoTiffRasterSource.scala @@ -0,0 +1,35 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import java.net.URI + +import geotrellis.spark.io.hadoop.HdfsRangeReader +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.locationtech.rasterframes.ref.RasterSource.{URIRasterSource, URIRasterSourceDebugString} + +case class HadoopGeoTiffRasterSource(source: URI, config: () => Configuration) + extends RangeReaderRasterSource with URIRasterSource with URIRasterSourceDebugString { self => + @transient + protected lazy val rangeReader = HdfsRangeReader(new Path(source.getPath), config()) +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/InMemoryRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/InMemoryRasterSource.scala new file mode 100644 index 000000000..3a6a2f5e1 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/InMemoryRasterSource.scala @@ -0,0 +1,52 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import geotrellis.proj4.CRS +import geotrellis.raster.{CellType, GridBounds, MultibandTile, Raster, Tile} +import geotrellis.raster.io.geotiff.Tags +import geotrellis.vector.Extent +import org.locationtech.rasterframes.ref.RasterSource.EMPTY_TAGS +import org.locationtech.rasterframes.tiles.ProjectedRasterTile + +case class InMemoryRasterSource(tile: Tile, extent: Extent, crs: CRS) extends RasterSource { + def this(prt: ProjectedRasterTile) = this(prt, prt.extent, prt.crs) + + override def rows: Int = tile.rows + + override def cols: Int = tile.cols + + override def cellType: CellType = tile.cellType + + override def bandCount: Int = 1 + + override def tags: Tags = EMPTY_TAGS + + override protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] = { + bounds + .map(b => { + val subext = rasterExtent.extentFor(b) + Raster(MultibandTile(tile.crop(b)), subext) + }) + .toIterator + } +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/JVMGeoTiffRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/JVMGeoTiffRasterSource.scala new file mode 100644 index 000000000..cedb81c61 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/JVMGeoTiffRasterSource.scala @@ -0,0 +1,28 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import java.net.URI + +import geotrellis.contrib.vlm.geotiff.GeoTiffRasterSource + +case class JVMGeoTiffRasterSource(source: URI) extends DelegatingRasterSource(source, () => GeoTiffRasterSource(source.toASCIIString)) diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/ProjectedRasterLike.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/ProjectedRasterLike.scala similarity index 95% rename from core/src/main/scala/astraea/spark/rasterframes/ref/ProjectedRasterLike.scala rename to core/src/main/scala/org/locationtech/rasterframes/ref/ProjectedRasterLike.scala index 7c5b2729d..515c47d12 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/ref/ProjectedRasterLike.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/ProjectedRasterLike.scala @@ -19,7 +19,7 @@ * */ -package astraea.spark.rasterframes.ref +package org.locationtech.rasterframes.ref import geotrellis.proj4.CRS import geotrellis.raster.CellGrid diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RangeReaderRasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RangeReaderRasterSource.scala new file mode 100644 index 000000000..90df001bd --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RangeReaderRasterSource.scala @@ -0,0 +1,64 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import com.typesafe.scalalogging.LazyLogging +import geotrellis.proj4.CRS +import geotrellis.raster.{CellType, GridBounds, MultibandTile, Raster} +import geotrellis.raster.io.geotiff.Tags +import geotrellis.raster.io.geotiff.reader.GeoTiffReader +import geotrellis.util.RangeReader +import geotrellis.vector.Extent +import org.locationtech.rasterframes.util.GeoTiffInfoSupport + +trait RangeReaderRasterSource extends RasterSource with GeoTiffInfoSupport with LazyLogging { + protected def rangeReader: RangeReader + + private def realInfo = + GeoTiffReader.readGeoTiffInfo(rangeReader, streaming = true, withOverviews = false) + + protected lazy val tiffInfo = SimpleRasterInfo(realInfo) + + def crs: CRS = tiffInfo.crs + + def extent: Extent = tiffInfo.extent + + override def cols: Int = tiffInfo.rasterExtent.cols + + override def rows: Int = tiffInfo.rasterExtent.rows + + def cellType: CellType = tiffInfo.cellType + + def bandCount: Int = tiffInfo.bandCount + + override def tags: Tags = tiffInfo.tags + + override protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] = { + val info = realInfo + val geoTiffTile = GeoTiffReader.geoTiffMultibandTile(info) + val intersectingBounds = bounds.flatMap(_.intersection(this)).toSeq + geoTiffTile.crop(intersectingBounds, bands.toArray).map { + case (gb, tile) => + Raster(tile, rasterExtent.extentFor(gb, clamp = true)) + } + } +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala similarity index 62% rename from core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala rename to core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala index ff176765c..b0aabcc48 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/ref/RasterRef.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterRef.scala @@ -19,26 +19,26 @@ * */ -package astraea.spark.rasterframes.ref +package org.locationtech.rasterframes.ref -import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} -import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO -import astraea.spark.rasterframes.tiles.ProjectedRasterTile import com.typesafe.scalalogging.LazyLogging import geotrellis.proj4.CRS -import geotrellis.raster.{CellType, GridBounds, Tile, TileLayout} -import geotrellis.spark.tiling.LayoutDefinition +import geotrellis.raster.{CellType, GridBounds, Tile} import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.rf.RasterSourceUDT -import org.apache.spark.sql.types.{StructField, StructType} +import org.apache.spark.sql.types.{IntegerType, StructField, StructType} +import org.locationtech.rasterframes.encoders.CatalystSerializer.{CatalystIO, _} +import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} +import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile +import org.locationtech.rasterframes.tiles.ProjectedRasterTile /** * A delayed-read projected raster implementation. * * @since 8/21/18 */ -case class RasterRef(source: RasterSource, subextent: Option[Extent]) +case class RasterRef(source: RasterSource, bandIndex: Int, subextent: Option[Extent]) extends ProjectedRasterLike { def crs: CRS = source.crs def extent: Extent = subextent.getOrElse(source.extent) @@ -46,44 +46,24 @@ case class RasterRef(source: RasterSource, subextent: Option[Extent]) def cols: Int = grid.width def rows: Int = grid.height def cellType: CellType = source.cellType - def tile: ProjectedRasterTile = ProjectedRasterTile(realizedTile, extent, crs) + def tile: ProjectedRasterTile = ProjectedRasterTile(RasterRefTile(this), extent, crs) - protected lazy val grid: GridBounds = source.rasterExtent.gridBoundsFor(extent) + protected lazy val grid: GridBounds = source.rasterExtent.gridBoundsFor(extent, true) protected def srcExtent: Extent = extent protected lazy val realizedTile: Tile = { - require(source.bandCount == 1, "Expected singleband tile") - RasterRef.log.trace(s"Fetching $srcExtent from $source") - source.read(srcExtent).left.get.tile - } - - /** Splits this tile into smaller tiles based on the reported - * internal structure of the backing format. May return a single item.*/ - def tileToNative: Seq[RasterRef] = { - val ex = this.extent - this.source.nativeTiling - .filter(_ intersects ex) - .map(e ⇒ RasterRef(this.source, Option(e))) + RasterRef.log.trace(s"Fetching $srcExtent from band $bandIndex of $source") + source.read(srcExtent, Seq(bandIndex)).tile.band(0) } } object RasterRef extends LazyLogging { private val log = logger - - /** Constructor for when data extent cover whole raster. */ - def apply(source: RasterSource): RasterRef = RasterRef(source, None) - - private[rasterframes] - def defaultLayout(rr: RasterRef): LayoutDefinition = - LayoutDefinition(rr.extent, rr.source.nativeLayout - .getOrElse(TileLayout(1, 1, rr.cols, rr.rows)) - ) - case class RasterRefTile(rr: RasterRef) extends ProjectedRasterTile { val extent: Extent = rr.extent val crs: CRS = rr.crs - override val cellType = rr.cellType + override def cellType = rr.cellType override val cols: Int = rr.cols override val rows: Int = rr.rows @@ -98,18 +78,21 @@ object RasterRef extends LazyLogging { val rsType = new RasterSourceUDT() override def schema: StructType = StructType(Seq( StructField("source", rsType, false), - StructField("subextent", CatalystSerializer[Extent].schema, true) + StructField("bandIndex", IntegerType, false), + StructField("subextent", schemaOf[Extent], true) )) override def to[R](t: RasterRef, io: CatalystIO[R]): R = io.create( io.to(t.source)(RasterSourceUDT.rasterSourceSerializer), + t.bandIndex, t.subextent.map(io.to[Extent]).orNull ) override def from[R](row: R, io: CatalystIO[R]): RasterRef = RasterRef( io.get[RasterSource](row, 0)(RasterSourceUDT.rasterSourceSerializer), - if (io.isNullAt(row, 1)) None - else Option(io.get[Extent](row, 1)) + io.getInt(row, 1), + if (io.isNullAt(row, 2)) None + else Option(io.get[Extent](row, 2)) ) } diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala new file mode 100644 index 000000000..0f73f85cf --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/RasterSource.scala @@ -0,0 +1,181 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import java.net.URI + +import com.github.blemale.scaffeine.Scaffeine +import com.typesafe.scalalogging.LazyLogging +import geotrellis.proj4.CRS +import geotrellis.raster._ +import geotrellis.raster.io.geotiff.Tags +import geotrellis.vector.Extent +import org.apache.hadoop.conf.Configuration +import org.apache.spark.annotation.Experimental +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.rf.RasterSourceUDT +import org.locationtech.rasterframes.model.{FixedRasterExtent, TileContext, TileDimensions} +import org.locationtech.rasterframes.{NOMINAL_TILE_DIMS, rfConfig} + +import scala.concurrent.duration.Duration + +/** + * Abstraction over fetching geospatial raster data. + * + * @since 8/21/18 + */ +@Experimental +trait RasterSource extends ProjectedRasterLike with Serializable { + import RasterSource._ + + def crs: CRS + + def extent: Extent + + def cellType: CellType + + def bandCount: Int + + def tags: Tags + + def read(bounds: GridBounds, bands: Seq[Int]): Raster[MultibandTile] = + readBounds(Seq(bounds), bands).next() + + def read(extent: Extent, bands: Seq[Int] = SINGLEBAND): Raster[MultibandTile] = + read(rasterExtent.gridBoundsFor(extent, clamp = true), bands) + + def readAll(dims: TileDimensions = NOMINAL_TILE_DIMS, bands: Seq[Int] = SINGLEBAND): Seq[Raster[MultibandTile]] = + layoutBounds(dims).map(read(_, bands)) + + protected def readBounds(bounds: Traversable[GridBounds], bands: Seq[Int]): Iterator[Raster[MultibandTile]] + + def rasterExtent = FixedRasterExtent(extent, cols, rows) + + def cellSize = CellSize(extent, cols, rows) + + def gridExtent = GridExtent(extent, cellSize) + + def tileContext: TileContext = TileContext(extent, crs) + + def layoutExtents(dims: TileDimensions): Seq[Extent] = { + val re = rasterExtent + layoutBounds(dims).map(re.rasterExtentFor).map(_.extent) + } + + def layoutBounds(dims: TileDimensions): Seq[GridBounds] = { + gridBounds.split(dims.cols, dims.rows).toSeq + } +} + +object RasterSource extends LazyLogging { + final val SINGLEBAND = Seq(0) + final val EMPTY_TAGS = Tags(Map.empty, List.empty) + + val cacheTimeout: Duration = Duration.fromNanos(rfConfig.getDuration("raster-source-cache-timeout").toNanos) + + private val rsCache = Scaffeine() + .expireAfterAccess(RasterSource.cacheTimeout) + .build[String, RasterSource] + + implicit def rsEncoder: ExpressionEncoder[RasterSource] = { + RasterSourceUDT // Makes sure UDT is registered first + ExpressionEncoder() + } + + def apply(source: URI): RasterSource = + rsCache.get( + source.toASCIIString, _ => source match { + case IsGDAL() => GDALRasterSource(source) + case IsHadoopGeoTiff() => + // TODO: How can we get the active hadoop configuration + // TODO: without having to pass it through? + val config = () => new Configuration() + HadoopGeoTiffRasterSource(source, config) + case IsDefaultGeoTiff() => JVMGeoTiffRasterSource(source) + case s => throw new UnsupportedOperationException(s"Reading '$s' not supported") + } + ) + + object IsGDAL { + + /** Determine if we should prefer GDAL for all types. */ + private val preferGdal: Boolean = org.locationtech.rasterframes.rfConfig.getBoolean("prefer-gdal") + + val gdalOnlyExtensions = Seq(".jp2", ".mrf", ".hdf", ".vrt") + + def gdalOnly(source: URI): Boolean = + if (gdalOnlyExtensions.exists(source.getPath.toLowerCase.endsWith)) { + require(GDALRasterSource.hasGDAL, s"Can only read $source if GDAL is available") + true + } else false + + /** Extractor for determining if a scheme indicates GDAL preference. */ + def unapply(source: URI): Boolean = { + lazy val schemeIsGdal = Option(source.getScheme()) + .exists(_.startsWith("gdal")) + + gdalOnly(source) || ((preferGdal || schemeIsGdal) && GDALRasterSource.hasGDAL) + } + } + + object IsDefaultGeoTiff { + def unapply(source: URI): Boolean = source.getScheme match { + case "file" | "http" | "https" | "s3" => true + case null | "" ⇒ true + case _ => false + } + } + + object IsHadoopGeoTiff { + def unapply(source: URI): Boolean = source.getScheme match { + case "hdfs" | "s3n" | "s3a" | "wasb" | "wasbs" => true + case _ => false + } + } + + trait URIRasterSource { _: RasterSource => + def source: URI + + abstract override def toString: String = { + s"${getClass.getSimpleName}(${source})" + } + } + trait URIRasterSourceDebugString { _: RasterSource with URIRasterSource with Product => + def toDebugString: String = { + val buf = new StringBuilder() + buf.append(productPrefix) + buf.append("(") + buf.append("source=") + buf.append(source.toASCIIString) + buf.append(", size=") + buf.append(size) + buf.append(", dimensions=") + buf.append(dimensions) + buf.append(", crs=") + buf.append(crs) + buf.append(", extent=") + buf.append(extent) + buf.append(")") + buf.toString + } + } +} diff --git a/core/src/main/scala/org/locationtech/rasterframes/ref/SimpleRasterInfo.scala b/core/src/main/scala/org/locationtech/rasterframes/ref/SimpleRasterInfo.scala new file mode 100644 index 000000000..1a67822e5 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/ref/SimpleRasterInfo.scala @@ -0,0 +1,82 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import com.github.blemale.scaffeine.Scaffeine +import geotrellis.contrib.vlm.geotiff.GeoTiffRasterSource +import geotrellis.contrib.vlm.{RasterSource => GTRasterSource} +import geotrellis.proj4.CRS +import geotrellis.raster.io.geotiff.Tags +import geotrellis.raster.io.geotiff.reader.GeoTiffReader +import geotrellis.raster.{CellType, RasterExtent} +import geotrellis.vector.Extent +import org.locationtech.rasterframes.ref.RasterSource.EMPTY_TAGS + +case class SimpleRasterInfo( + cols: Int, + rows: Int, + cellType: CellType, + extent: Extent, + rasterExtent: RasterExtent, + crs: CRS, + tags: Tags, + bandCount: Int, + noDataValue: Option[Double] +) + +object SimpleRasterInfo { + def apply(info: GeoTiffReader.GeoTiffInfo): SimpleRasterInfo = + SimpleRasterInfo( + info.segmentLayout.totalCols, + info.segmentLayout.totalRows, + info.cellType, + info.extent, + info.rasterExtent, + info.crs, + info.tags, + info.bandCount, + info.noDataValue + ) + + def apply(rs: GTRasterSource): SimpleRasterInfo = { + def fetchTags: Tags = rs match { + case gt: GeoTiffRasterSource => gt.tiff.tags + case _ => EMPTY_TAGS + } + + SimpleRasterInfo( + rs.cols, + rs.rows, + rs.cellType, + rs.extent, + rs.rasterExtent, + rs.crs, + fetchTags, + rs.bandCount, + None + ) + } + + lazy val cache = Scaffeine() + //.recordStats() + .build[String, SimpleRasterInfo] +} \ No newline at end of file diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilterPushdownRules.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilterPushdownRules.scala similarity index 93% rename from core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilterPushdownRules.scala rename to core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilterPushdownRules.scala index d61640748..3b3e54d6f 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilterPushdownRules.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilterPushdownRules.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,10 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.rules +package org.locationtech.rasterframes.rules import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilters.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilters.scala similarity index 87% rename from core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilters.scala rename to core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilters.scala index 1a1128150..cf731b658 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialFilters.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialFilters.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,12 +15,13 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.rules +package org.locationtech.rasterframes.rules -import com.vividsolutions.jts.geom.Geometry +import org.locationtech.jts.geom.Geometry import org.apache.spark.sql.sources.Filter /** diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialRelationReceiver.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialRelationReceiver.scala similarity index 92% rename from core/src/main/scala/astraea/spark/rasterframes/rules/SpatialRelationReceiver.scala rename to core/src/main/scala/org/locationtech/rasterframes/rules/SpatialRelationReceiver.scala index 36c681d14..403d122ea 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/rules/SpatialRelationReceiver.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialRelationReceiver.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,10 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.rules +package org.locationtech.rasterframes.rules import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.{BaseRelation, Filter} diff --git a/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialUDFSubstitutionRules.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialUDFSubstitutionRules.scala new file mode 100644 index 000000000..d6fea76b0 --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/rules/SpatialUDFSubstitutionRules.scala @@ -0,0 +1,42 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.rules + +import org.locationtech.rasterframes.expressions.SpatialRelation +import org.apache.spark.sql.catalyst.expressions.ScalaUDF +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule + +/** + * Swaps out spatial relation UDFs for expression forms. + * + * @since 2/19/18 + */ +object SpatialUDFSubstitutionRules extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = { + plan.transform { + case q: LogicalPlan => q.transformExpressions { + case s: ScalaUDF => SpatialRelation.fromUDF(s).getOrElse(s) + } + } + } +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/TemporalFilters.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/TemporalFilters.scala similarity index 91% rename from core/src/main/scala/astraea/spark/rasterframes/rules/TemporalFilters.scala rename to core/src/main/scala/org/locationtech/rasterframes/rules/TemporalFilters.scala index 51ea9ddd7..5315b63b7 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/rules/TemporalFilters.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/rules/TemporalFilters.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,10 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.rules +package org.locationtech.rasterframes.rules import java.sql.{Date, Timestamp} diff --git a/core/src/main/scala/astraea/spark/rasterframes/rules/package.scala b/core/src/main/scala/org/locationtech/rasterframes/rules/package.scala similarity index 59% rename from core/src/main/scala/astraea/spark/rasterframes/rules/package.scala rename to core/src/main/scala/org/locationtech/rasterframes/rules/package.scala index ff4755a86..0f028e14e 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/rules/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/rules/package.scala @@ -1,4 +1,25 @@ -package astraea.spark.rasterframes +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala b/core/src/main/scala/org/locationtech/rasterframes/stats/CellHistogram.scala similarity index 94% rename from core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala rename to core/src/main/scala/org/locationtech/rasterframes/stats/CellHistogram.scala index efc4908db..be3d547a3 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/stats/CellHistogram.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/stats/CellHistogram.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,14 +15,16 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.stats -import astraea.spark.rasterframes.encoders.StandardEncoders +package org.locationtech.rasterframes.stats + import geotrellis.raster.Tile import geotrellis.raster.histogram.{Histogram => GTHistogram} import org.apache.spark.sql.types._ +import org.locationtech.rasterframes.encoders.StandardEncoders import scala.collection.mutable.{ListBuffer => MutableListBuffer} @@ -160,13 +162,13 @@ object CellHistogram { } else { val h = tile.histogram - h.binCounts().map(p ⇒ Bin(p._1, p._2)) + h.binCounts().map(p ⇒ Bin(p._1.toDouble, p._2)) } CellHistogram(bins) } def apply(hist: GTHistogram[Int]): CellHistogram = { - CellHistogram(hist.binCounts().map(p ⇒ Bin(p._1, p._2))) + CellHistogram(hist.binCounts().map(p ⇒ Bin(p._1.toDouble, p._2))) } def apply(hist: GTHistogram[Double])(implicit ev: DummyImplicit): CellHistogram = { CellHistogram(hist.binCounts().map(p ⇒ Bin(p._1, p._2))) diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala b/core/src/main/scala/org/locationtech/rasterframes/stats/CellStatistics.scala similarity index 93% rename from core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala rename to core/src/main/scala/org/locationtech/rasterframes/stats/CellStatistics.scala index e1ba03b60..ea371666d 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/stats/CellStatistics.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/stats/CellStatistics.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,13 +15,15 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.stats -import astraea.spark.rasterframes.encoders.StandardEncoders +package org.locationtech.rasterframes.stats + import geotrellis.raster.Tile import org.apache.spark.sql.types.StructType +import org.locationtech.rasterframes.encoders.StandardEncoders /** * Container for computed statistics over cells. diff --git a/core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala b/core/src/main/scala/org/locationtech/rasterframes/stats/LocalCellStatistics.scala similarity index 94% rename from core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala rename to core/src/main/scala/org/locationtech/rasterframes/stats/LocalCellStatistics.scala index 685722f62..39c0671f8 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/stats/LocalCellStatistics.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/stats/LocalCellStatistics.scala @@ -19,7 +19,8 @@ * */ -package astraea.spark.rasterframes.stats +package org.locationtech.rasterframes.stats + import geotrellis.raster.Tile case class LocalCellStatistics(count: Tile, min: Tile, max: Tile, mean: Tile, variance: Tile) diff --git a/core/src/main/scala/org/locationtech/rasterframes/tiles/FixedDelegatingTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/FixedDelegatingTile.scala new file mode 100644 index 000000000..52bfa5c1d --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/FixedDelegatingTile.scala @@ -0,0 +1,40 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.tiles +import geotrellis.raster.{ArrayTile, DelegatingTile, Tile} + +/** + * Temporary workaroud for https://github.com/locationtech/geotrellis/issues/2907 + * + * @since 8/22/18 + */ +trait FixedDelegatingTile extends DelegatingTile { + override def combine(r2: Tile)(f: (Int, Int) ⇒ Int): Tile = (delegate, r2) match { + case (del: ArrayTile, r2: DelegatingTile) ⇒ del.combine(r2.toArrayTile())(f) + case _ ⇒ delegate.combine(r2)(f) + } + + override def combineDouble(r2: Tile)(f: (Double, Double) ⇒ Double): Tile = (delegate, r2) match { + case (del: ArrayTile, r2: DelegatingTile) ⇒ del.combineDouble(r2.toArrayTile())(f) + case _ ⇒ delegate.combineDouble(r2)(f) + } +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/InternalRowTile.scala similarity index 86% rename from core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala rename to core/src/main/scala/org/locationtech/rasterframes/tiles/InternalRowTile.scala index 021f0946a..98be22446 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/tiles/InternalRowTile.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/InternalRowTile.scala @@ -19,52 +19,38 @@ * */ -package astraea.spark.rasterframes.tiles +package org.locationtech.rasterframes.tiles import java.nio.ByteBuffer -import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO -import astraea.spark.rasterframes.model.{Cells, TileDataContext} +import org.locationtech.rasterframes.encoders.CatalystSerializer.CatalystIO import geotrellis.raster._ import org.apache.spark.sql.catalyst.InternalRow +import org.locationtech.rasterframes.model.{Cells, TileDataContext} /** * Wrapper around a `Tile` encoded in a Catalyst `InternalRow`, for the purpose * of providing compatible semantics over common operations. * - * @groupname COPIES Memory Copying - * @groupdesc COPIES Requires creating an intermediate copy of - * the complete `Tile` contents, and should be avoided. - * * @since 11/29/17 */ -class InternalRowTile(val mem: InternalRow) extends DelegatingTile { +class InternalRowTile(val mem: InternalRow) extends FixedDelegatingTile { import InternalRowTile._ - /** @group COPIES */ - override def toArrayTile(): ArrayTile = realizedTile + override def toArrayTile(): ArrayTile = realizedTile.toArrayTile() - // TODO: We want to reimpliement the delegated methods so that they read directly from tungsten storage - protected lazy val realizedTile: ArrayTile = { - val data = toBytes - if(data.length < cols * rows && cellType.name != "bool") { - val ctile = ConstantTile.fromBytes(data, cellType, cols, rows) - val atile = ctile.toArrayTile() - atile - } - else - ArrayTile.fromBytes(data, cellType, cols, rows) - } + // TODO: We want to reimplement relevant delegated methods so that they read directly from tungsten storage + lazy val realizedTile: Tile = cells.toTile(cellContext) - /** @group COPIES */ protected override def delegate: Tile = realizedTile - private lazy val cellContext: TileDataContext = + private def cellContext: TileDataContext = CatalystIO[InternalRow].get[TileDataContext](mem, 0) + private def cells: Cells = CatalystIO[InternalRow].get[Cells](mem, 1) /** Retrieve the cell type from the internal encoding. */ - override def cellType: CellType = cellContext.cell_type + override def cellType: CellType = cellContext.cellType /** Retrieve the number of columns from the internal encoding. */ override def cols: Int = cellContext.dimensions.cols @@ -74,13 +60,9 @@ class InternalRowTile(val mem: InternalRow) extends DelegatingTile { /** Get the internally encoded tile data cells. */ override lazy val toBytes: Array[Byte] = { - val cellData = CatalystIO[InternalRow] - .get[Cells](mem, 1) - .data - - cellData.left + cells.data.left .getOrElse(throw new IllegalStateException( - "Expected tile cell bytes, but received RasterRef instead: " + cellData.right.get) + "Expected tile cell bytes, but received RasterRef instead: " + cells.data.right.get) ) } @@ -98,12 +80,11 @@ class InternalRowTile(val mem: InternalRow) extends DelegatingTile { } /** Reads the cell value at the given index as an Int. */ - def apply(i: Int): Int = cellReader(i) + def apply(i: Int): Int = cellReader.apply(i) /** Reads the cell value at the given index as a Double. */ def applyDouble(i: Int): Double = cellReader.applyDouble(i) - /** @group COPIES */ def copy = new InternalRowTile(mem.copy) private lazy val cellReader: CellReader = { @@ -132,6 +113,8 @@ class InternalRowTile(val mem: InternalRow) extends DelegatingTile { case _: DoubleCells ⇒ DoubleCellReader(this) } } + + override def toString: String = ShowableTile.show(this) } object InternalRowTile { diff --git a/core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala similarity index 77% rename from core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala rename to core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala index a9551dd13..92e2d285d 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/tiles/ProjectedRasterTile.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/ProjectedRasterTile.scala @@ -19,32 +19,33 @@ * */ -package astraea.spark.rasterframes.tiles +package org.locationtech.rasterframes.tiles -import astraea.spark.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} -import astraea.spark.rasterframes.encoders.CatalystSerializer.CatalystIO -import astraea.spark.rasterframes.model.TileContext -import astraea.spark.rasterframes.ref.ProjectedRasterLike -import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile import geotrellis.proj4.CRS import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.raster.{CellType, ProjectedRaster, Tile} import geotrellis.vector.{Extent, ProjectedExtent} -import org.apache.spark.sql.Encoder import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.types.{StructField, StructType} +import org.locationtech.rasterframes.TileType +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.encoders.{CatalystSerializer, CatalystSerializerEncoder} +import org.locationtech.rasterframes.model.TileContext +import org.locationtech.rasterframes.ref.ProjectedRasterLike +import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile /** * A Tile that's also like a ProjectedRaster, with delayed evaluation support. * * @since 9/5/18 */ -trait ProjectedRasterTile extends DelegatingTile with ProjectedRasterLike { +trait ProjectedRasterTile extends FixedDelegatingTile with ProjectedRasterLike { def extent: Extent def crs: CRS def projectedExtent: ProjectedExtent = ProjectedExtent(extent, crs) def projectedRaster: ProjectedRaster[Tile] = ProjectedRaster[Tile](this, extent, crs) + def mapTile(f: Tile => Tile): ProjectedRasterTile = ProjectedRasterTile(f(this), extent, crs) } object ProjectedRasterTile { @@ -58,14 +59,19 @@ object ProjectedRasterTile { case class ConcreteProjectedRasterTile(t: Tile, extent: Extent, crs: CRS) extends ProjectedRasterTile { def delegate: Tile = t + override def convert(cellType: CellType): Tile = ConcreteProjectedRasterTile(t.convert(cellType), extent, crs) - } + override def toString: String = { + val e = s"(${extent.xmin}, ${extent.ymin}, ${extent.xmax}, ${extent.ymax})" + val c = crs.toProj4String + s"[${ShowableTile.show(t)}, $e, $c]" + } + } implicit val serializer: CatalystSerializer[ProjectedRasterTile] = new CatalystSerializer[ProjectedRasterTile] { - val TileType = new TileUDT() override def schema: StructType = StructType(Seq( - StructField("tile_context", CatalystSerializer[TileContext].schema, false), + StructField("tile_context", schemaOf[TileContext], false), StructField("tile", TileType, false)) ) diff --git a/core/src/main/scala/org/locationtech/rasterframes/tiles/ShowableTile.scala b/core/src/main/scala/org/locationtech/rasterframes/tiles/ShowableTile.scala new file mode 100644 index 000000000..00872ff6c --- /dev/null +++ b/core/src/main/scala/org/locationtech/rasterframes/tiles/ShowableTile.scala @@ -0,0 +1,55 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.tiles +import org.locationtech.rasterframes._ +import geotrellis.raster.Tile + +class ShowableTile(val delegate: Tile) extends FixedDelegatingTile { + override def equals(obj: Any): Boolean = obj match { + case st: ShowableTile => delegate.equals(st.delegate) + case o => delegate.equals(o) + } + override def hashCode(): Int = delegate.hashCode() + override def toString: String = ShowableTile.show(delegate) +} + +object ShowableTile { + private val maxCells = rfConfig.getInt("showable-max-cells") + def show(tile: Tile): String = { + val ct = tile.cellType + val dims = tile.dimensions + + val data = if (tile.cellType.isFloatingPoint) + tile.toArrayDouble() + else tile.toArray() + + val cells = if(tile.size <= maxCells) { + data.mkString("[", ",", "]") + } + else { + val front = data.take(maxCells/2).mkString("[", ",", "") + val back = data.takeRight(maxCells/2).mkString("", ",", "]") + front + ",...," + back + } + s"[${ct.name}, $dims, $cells]" + } +} diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala b/core/src/main/scala/org/locationtech/rasterframes/util/DataBiasedOp.scala similarity index 97% rename from core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/DataBiasedOp.scala index c2e2578a3..83e5fe76c 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/DataBiasedOp.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/DataBiasedOp.scala @@ -19,7 +19,7 @@ * */ -package astraea.spark.rasterframes.util +package org.locationtech.rasterframes.util import geotrellis.raster import geotrellis.raster.isNoData diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/GeoTiffInfoSupport.scala b/core/src/main/scala/org/locationtech/rasterframes/util/GeoTiffInfoSupport.scala similarity index 96% rename from core/src/main/scala/astraea/spark/rasterframes/util/GeoTiffInfoSupport.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/GeoTiffInfoSupport.scala index 724d7eaeb..e24bb8175 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/GeoTiffInfoSupport.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/GeoTiffInfoSupport.scala @@ -19,7 +19,8 @@ * */ -package astraea.spark.rasterframes.util +package org.locationtech.rasterframes.util + import geotrellis.raster.TileLayout import geotrellis.raster.io.geotiff.reader.GeoTiffReader import geotrellis.raster.io.geotiff.reader.GeoTiffReader.GeoTiffInfo @@ -37,7 +38,7 @@ trait GeoTiffInfoSupport { val MAX_SIZE = 256 private def defaultLayout(cols: Int, rows: Int): TileLayout = { def divs(cells: Int) = { - val layoutDivs = math.ceil(cells / MAX_SIZE.toFloat) + val layoutDivs = math.ceil(cells / MAX_SIZE.toDouble) val tileDivs = math.ceil(cells / layoutDivs) (layoutDivs.toInt, tileDivs.toInt) } diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/KryoSupport.scala b/core/src/main/scala/org/locationtech/rasterframes/util/KryoSupport.scala similarity index 97% rename from core/src/main/scala/astraea/spark/rasterframes/util/KryoSupport.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/KryoSupport.scala index b20aa7851..26754b91d 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/KryoSupport.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/KryoSupport.scala @@ -19,7 +19,7 @@ * */ -package astraea.spark.rasterframes.util +package org.locationtech.rasterframes.util import java.nio.ByteBuffer diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala b/core/src/main/scala/org/locationtech/rasterframes/util/MultibandRender.scala similarity index 97% rename from core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/MultibandRender.scala index 764d049ee..b576f1e67 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/MultibandRender.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/MultibandRender.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,10 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.util +package org.locationtech.rasterframes.util import geotrellis.raster._ import geotrellis.raster.render.{ColorRamp, Png} diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/RFKryoRegistrator.scala b/core/src/main/scala/org/locationtech/rasterframes/util/RFKryoRegistrator.scala similarity index 72% rename from core/src/main/scala/astraea/spark/rasterframes/util/RFKryoRegistrator.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/RFKryoRegistrator.scala index 58fb62121..8275c6402 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/RFKryoRegistrator.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/RFKryoRegistrator.scala @@ -19,11 +19,11 @@ * */ -package astraea.spark.rasterframes.util +package org.locationtech.rasterframes.util -import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile -import astraea.spark.rasterframes.ref.{RasterRef, RasterSource} -import astraea.spark.rasterframes.ref.RasterSource._ +import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile +import org.locationtech.rasterframes.ref.{DelegatingRasterSource, RasterRef, RasterSource} +import org.locationtech.rasterframes.ref._ import com.esotericsoftware.kryo.Kryo @@ -36,14 +36,15 @@ import com.esotericsoftware.kryo.Kryo class RFKryoRegistrator extends geotrellis.spark.io.kryo.KryoRegistrator { override def registerClasses(kryo: Kryo): Unit = { super.registerClasses(kryo) - kryo.register(classOf[ReadCallback]) kryo.register(classOf[RasterSource]) kryo.register(classOf[RasterRef]) kryo.register(classOf[RasterRefTile]) - kryo.register(classOf[FileGeoTiffRasterSource]) + kryo.register(classOf[DelegatingRasterSource]) + kryo.register(classOf[JVMGeoTiffRasterSource]) + kryo.register(classOf[InMemoryRasterSource]) kryo.register(classOf[HadoopGeoTiffRasterSource]) - kryo.register(classOf[S3GeoTiffRasterSource]) - kryo.register(classOf[HttpGeoTiffRasterSource]) + kryo.register(classOf[GDALRasterSource]) + kryo.register(classOf[SimpleRasterInfo]) kryo.register(classOf[geotrellis.raster.io.geotiff.reader.GeoTiffReader.GeoTiffInfo]) } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/SubdivideSupport.scala b/core/src/main/scala/org/locationtech/rasterframes/util/SubdivideSupport.scala similarity index 97% rename from core/src/main/scala/astraea/spark/rasterframes/util/SubdivideSupport.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/SubdivideSupport.scala index 162614651..24ee2ce2d 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/SubdivideSupport.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/SubdivideSupport.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,10 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.util +package org.locationtech.rasterframes.util import geotrellis.raster.crop.Crop import geotrellis.raster.{CellGrid, TileLayout} diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala b/core/src/main/scala/org/locationtech/rasterframes/util/ZeroSevenCompatibilityKit.scala similarity index 54% rename from core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/ZeroSevenCompatibilityKit.scala index bbb23a282..5826ad09a 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/ZeroSevenCompatibilityKit.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/ZeroSevenCompatibilityKit.scala @@ -19,17 +19,18 @@ * */ -package astraea.spark.rasterframes.util -import astraea.spark.rasterframes.expressions.TileAssembler -import astraea.spark.rasterframes.expressions.accessors._ -import astraea.spark.rasterframes.expressions.aggstats._ -import astraea.spark.rasterframes.expressions.generators._ -import astraea.spark.rasterframes.expressions.localops._ -import astraea.spark.rasterframes.expressions.tilestats._ -import astraea.spark.rasterframes.expressions.transformers._ -import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics} -import astraea.spark.rasterframes.{functions => F} -import com.vividsolutions.jts.geom.Geometry +package org.locationtech.rasterframes.util + +import org.locationtech.rasterframes.expressions.TileAssembler +import org.locationtech.rasterframes.expressions.accessors._ +import org.locationtech.rasterframes.expressions.aggregates._ +import org.locationtech.rasterframes.expressions.generators._ +import org.locationtech.rasterframes.expressions.localops._ +import org.locationtech.rasterframes.expressions.tilestats._ +import org.locationtech.rasterframes.expressions.transformers._ +import org.locationtech.rasterframes.stats._ +import org.locationtech.rasterframes.{functions => F} +import org.locationtech.jts.geom.Geometry import geotrellis.proj4.CRS import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp import geotrellis.raster.{CellType, Tile} @@ -45,181 +46,181 @@ import org.apache.spark.sql.{Column, SQLContext, TypedColumn, rf} * @since 4/3/17 */ object ZeroSevenCompatibilityKit { - import astraea.spark.rasterframes.encoders.StandardEncoders._ + import org.locationtech.rasterframes.encoders.StandardEncoders._ trait RasterFunctions { - private val delegate = new astraea.spark.rasterframes.RasterFunctions {} + private val delegate = new org.locationtech.rasterframes.RasterFunctions {} // format: off /** Create a row for each cell in Tile. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def explodeTiles(cols: Column*): Column = delegate.explode_tiles(cols: _*) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def explodeTiles(cols: Column*): Column = delegate.rf_explode_tiles(cols: _*) /** Create a row for each cell in Tile with random sampling and optional seed. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def explodeTilesSample(sampleFraction: Double, seed: Option[Long], cols: Column*): Column = ExplodeTiles(sampleFraction, seed, cols) /** Create a row for each cell in Tile with random sampling (no seed). */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def explodeTilesSample(sampleFraction: Double, cols: Column*): Column = ExplodeTiles(sampleFraction, None, cols) /** Query the number of (cols, rows) in a Tile. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def tileDimensions(col: Column): Column = GetDimensions(col) @Experimental /** Convert array in `arrayCol` into a Tile of dimensions `cols` and `rows`*/ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def arrayToTile(arrayCol: Column, cols: Int, rows: Int) = withAlias("array_to_tile", arrayCol)( + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def arrayToTile(arrayCol: Column, cols: Int, rows: Int) = withAlias("rf_array_to_tile", arrayCol)( udf[Tile, AnyRef](F.arrayToTile(cols, rows)).apply(arrayCol) ) /** Create a Tile from a column of cell data with location indexes and preform cell conversion. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def assembleTile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Int, tileRows: Int, ct: CellType): TypedColumn[Any, Tile] = convertCellType(TileAssembler(columnIndex, rowIndex, cellData, lit(tileCols), lit(tileRows)), ct).as(cellData.columnName).as[Tile] /** Create a Tile from a column of cell data with location indexes. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def assembleTile(columnIndex: Column, rowIndex: Column, cellData: Column, tileCols: Column, tileRows: Column): TypedColumn[Any, Tile] = TileAssembler(columnIndex, rowIndex, cellData, tileCols, tileRows) /** Extract the Tile's cell type */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def cellType(col: Column): TypedColumn[Any, CellType] = GetCellType(col) /** Change the Tile's cell type */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def convertCellType(col: Column, cellType: CellType): TypedColumn[Any, Tile] = SetCellType(col, cellType) /** Change the Tile's cell type */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def convertCellType(col: Column, cellTypeName: String): TypedColumn[Any, Tile] = SetCellType(col, cellTypeName) /** Convert a bounding box structure to a Geometry type. Intented to support multiple schemas. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def boundsGeometry(bounds: Column): TypedColumn[Any, Geometry] = BoundsToGeometry(bounds) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def boundsGeometry(bounds: Column): TypedColumn[Any, Geometry] = ExtentToGeometry(bounds) /** Assign a `NoData` value to the Tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def withNoData(col: Column, nodata: Double) = withAlias("withNoData", col)( udf[Tile, Tile](F.withNoData(nodata)).apply(col) ).as[Tile] /** Compute the full column aggregate floating point histogram. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def aggHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.agg_approx_histogram(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def aggHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.rf_agg_approx_histogram(col) /** Compute the full column aggregate floating point statistics. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def aggStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.agg_stats(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def aggStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.rf_agg_stats(col) /** Computes the column aggregate mean. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def aggMean(col: Column) = CellMeanAggregate(col) /** Computes the number of non-NoData cells in a column. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def aggDataCells(col: Column): TypedColumn[Any, Long] = delegate.agg_data_cells(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def aggDataCells(col: Column): TypedColumn[Any, Long] = delegate.rf_agg_data_cells(col) /** Computes the number of NoData cells in a column. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def aggNoDataCells(col: Column): TypedColumn[Any, Long] = delegate.agg_no_data_cells(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def aggNoDataCells(col: Column): TypedColumn[Any, Long] = delegate.rf_agg_no_data_cells(col) /** Compute the Tile-wise mean */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def tileMean(col: Column): TypedColumn[Any, Double] = delegate.tile_mean(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def tileMean(col: Column): TypedColumn[Any, Double] = delegate.rf_tile_mean(col) /** Compute the Tile-wise sum */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def tileSum(col: Column): TypedColumn[Any, Double] = delegate.tile_sum(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def tileSum(col: Column): TypedColumn[Any, Double] = delegate.rf_tile_sum(col) /** Compute the minimum cell value in tile. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def tileMin(col: Column): TypedColumn[Any, Double] = delegate.tile_min(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def tileMin(col: Column): TypedColumn[Any, Double] = delegate.rf_tile_min(col) /** Compute the maximum cell value in tile. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def tileMax(col: Column): TypedColumn[Any, Double] = delegate.tile_max(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def tileMax(col: Column): TypedColumn[Any, Double] = delegate.rf_tile_max(col) /** Compute TileHistogram of Tile values. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def tileHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.tile_histogram(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def tileHistogram(col: Column): TypedColumn[Any, CellHistogram] = delegate.rf_tile_histogram(col) /** Compute statistics of Tile values. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def tileStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.tile_stats(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def tileStats(col: Column): TypedColumn[Any, CellStatistics] = delegate.rf_tile_stats(col) /** Counts the number of non-NoData cells per Tile. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def dataCells(tile: Column): TypedColumn[Any, Long] = delegate.data_cells(tile) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def dataCells(tile: Column): TypedColumn[Any, Long] = delegate.rf_data_cells(tile) /** Counts the number of NoData cells per Tile. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def noDataCells(tile: Column): TypedColumn[Any, Long] = delegate.no_data_cells(tile) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def noDataCells(tile: Column): TypedColumn[Any, Long] = delegate.rf_no_data_cells(tile) - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def isNoDataTile(tile: Column): TypedColumn[Any, Boolean] = delegate.is_no_data_tile(tile) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def isNoDataTile(tile: Column): TypedColumn[Any, Boolean] = delegate.rf_is_no_data_tile(tile) /** Compute cell-local aggregate descriptive statistics for a column of Tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localAggStats(col: Column): Column = delegate.agg_local_stats(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localAggStats(col: Column): Column = delegate.rf_agg_local_stats(col) /** Compute the cell-wise/local max operation between Tiles in a column. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localAggMax(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_max(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localAggMax(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_max(col) /** Compute the cellwise/local min operation between Tiles in a column. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localAggMin(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_min(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localAggMin(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_min(col) /** Compute the cellwise/local mean operation between Tiles in a column. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localAggMean(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_mean(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localAggMean(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_mean(col) /** Compute the cellwise/local count of non-NoData cells for all Tiles in a column. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localAggDataCells(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_data_cells(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localAggDataCells(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_data_cells(col) /** Compute the cellwise/local count of NoData cells for all Tiles in a column. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localAggNoDataCells(col: Column): TypedColumn[Any, Tile] = delegate.agg_local_no_data_cells(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localAggNoDataCells(col: Column): TypedColumn[Any, Tile] = delegate.rf_agg_local_no_data_cells(col) /** Cellwise addition between two Tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localAdd(left: Column, right: Column): Column = delegate.local_add(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localAdd(left: Column, right: Column): Column = delegate.rf_local_add(left, right) /** Cellwise addition of a scalar to a tile. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localAddScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_add(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localAddScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_add(tileCol, value) /** Cellwise subtraction between two Tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localSubtract(left: Column, right: Column): Column = delegate.local_subtract(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localSubtract(left: Column, right: Column): Column = delegate.rf_local_subtract(left, right) /** Cellwise subtraction of a scalar from a tile. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localSubtractScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_subtract(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localSubtractScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_subtract(tileCol, value) /** Cellwise multiplication between two Tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localMultiply(left: Column, right: Column): Column = delegate.local_multiply(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localMultiply(left: Column, right: Column): Column = delegate.rf_local_multiply(left, right) /** Cellwise multiplication of a tile by a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localMultiplyScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_multiply(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localMultiplyScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_multiply(tileCol, value) /** Cellwise division between two Tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localDivide(left: Column, right: Column): Column = delegate.local_divide(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localDivide(left: Column, right: Column): Column = delegate.rf_local_divide(left, right) /** Cellwise division of a tile by a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localDivideScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_divide(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localDivideScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_divide(tileCol, value) /** Perform an arbitrary GeoTrellis `LocalTileBinaryOp` between two Tile columns. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def localAlgebra(op: LocalTileBinaryOp, left: Column, right: Column): TypedColumn[Any, Tile] = withAlias(opName(op), left, right)( @@ -227,94 +228,94 @@ object ZeroSevenCompatibilityKit { ).as[Tile] /** Compute the normalized difference of two tile columns */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def normalizedDifference(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.normalized_difference(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def normalizedDifference(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_normalized_difference(left, right) /** Constructor for constant tile column */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def makeConstantTile(value: Number, cols: Int, rows: Int, cellType: String): TypedColumn[Any, Tile] = udf(() => F.makeConstantTile(value, cols, rows, cellType)).apply().as(s"constant_$cellType").as[Tile] /** Alias for column of constant tiles of zero */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def tileZeros(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] = udf(() => F.tileZeros(cols, rows, cellType)).apply().as(s"zeros_$cellType").as[Tile] /** Alias for column of constant tiles of one */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def tileOnes(cols: Int, rows: Int, cellType: String = "float64"): TypedColumn[Any, Tile] = udf(() => F.tileOnes(cols, rows, cellType)).apply().as(s"ones_$cellType").as[Tile] /** Where the mask tile equals the mask value, replace values in the source tile with NODATA */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def maskByValue(sourceTile: Column, maskTile: Column, maskValue: Column): TypedColumn[Any, Tile] = - delegate.mask_by_value(sourceTile, maskTile, maskValue) + delegate.rf_mask_by_value(sourceTile, maskTile, maskValue) /** Where the mask tile DOES NOT contain NODATA, replace values in the source tile with NODATA */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def inverseMask(sourceTile: Column, maskTile: Column): TypedColumn[Any, Tile] = - delegate.inverse_mask(sourceTile, maskTile) + delegate.rf_inverse_mask(sourceTile, maskTile) /** Reproject a column of geometry from one CRS to another. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def reprojectGeometry(sourceGeom: Column, srcCRS: CRS, dstCRS: CRS): TypedColumn[Any, Geometry] = - delegate.reproject_geometry(sourceGeom, srcCRS, dstCRS) + delegate.st_reproject(sourceGeom, srcCRS, dstCRS) /** Render Tile as ASCII string for debugging purposes. */ @Experimental - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def renderAscii(col: Column): TypedColumn[Any, String] = delegate.render_ascii(col) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def renderAscii(col: Column): TypedColumn[Any, String] = delegate.rf_render_ascii(col) /** Cellwise less than value comparison between two tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def localLess(left: Column, right: Column): TypedColumn[Any, Tile] = - delegate.local_less(left, right) + delegate.rf_local_less(left, right) /** Cellwise less than value comparison between a tile and a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localLessScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_less(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localLessScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_less(tileCol, value) /** Cellwise less than or equal to value comparison between a tile and a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localLessEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_less_equal(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localLessEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_local_less_equal(left, right) /** Cellwise less than or equal to value comparison between a tile and a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localLessEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_less_equal(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localLessEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_less_equal(tileCol, value) /** Cellwise greater than value comparison between two tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") def localGreater(left: Column, right: Column): TypedColumn[Any, Tile] = - delegate.local_greater(left, right) + delegate.rf_local_greater(left, right) /** Cellwise greater than value comparison between a tile and a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localGreaterScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_greater(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localGreaterScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_greater(tileCol, value) /** Cellwise greater than or equal to value comparison between two tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localGreaterEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_greater_equal(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localGreaterEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_local_greater_equal(left, right) /** Cellwise greater than or equal to value comparison between a tile and a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localGreaterEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_greater_equal(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localGreaterEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_greater_equal(tileCol, value) /** Cellwise equal to value comparison between two tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_equal(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localEqual(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_local_equal(left, right) /** Cellwise equal to value comparison between a tile and a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_equal(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localEqualScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_equal(tileCol, value) /** Cellwise inequality comparison between two tiles. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localUnequal(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.local_unequal(left, right) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localUnequal(left: Column, right: Column): TypedColumn[Any, Tile] = delegate.rf_local_unequal(left, right) /** Cellwise inequality comparison between a tile and a scalar. */ - @deprecated("Part of 0.7.x compatility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") - def localUnequalScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.local_unequal(tileCol, value) + @deprecated("Part of 0.7.x compatibility kit, to be removed after 0.8.x. Please use \"snake_case\" variant instead.", "0.8.0") + def localUnequalScalar[T: Numeric](tileCol: Column, value: T): TypedColumn[Any, Tile] = delegate.rf_local_unequal(tileCol, value) } def register(sqlContext: SQLContext): Unit = { @@ -323,6 +324,8 @@ object ZeroSevenCompatibilityKit { def ub[A, B](f: A => B)(a: Seq[A]): B = f(a.head) /** Binary expression builder builder. */ def bb[A, B](f: (A, A) => B)(a: Seq[A]): B = f(a.head, a.last) + /** Trinary expression builder builder. */ + def tb[A, B](f: (A, A, A) => B)(a: Seq[A]): B = f(a.head, a.tail.head, a.last) // Expression-oriented functions have a different registration scheme // Currently have to register with the `builtin` registry due to Spark data hiding. @@ -331,7 +334,7 @@ object ZeroSevenCompatibilityKit { registry.registerFunc("rf_cellType", ub(GetCellType.apply)) registry.registerFunc("rf_convertCellType", bb(SetCellType.apply)) registry.registerFunc("rf_tileDimensions", ub(GetDimensions.apply)) - registry.registerFunc("rf_boundsGeometry", ub(BoundsToGeometry.apply)) + registry.registerFunc("rf_boundsGeometry", ub(ExtentToGeometry.apply)) registry.registerFunc("rf_localAdd", bb(Add.apply)) registry.registerFunc("rf_localSubtract", bb(Subtract.apply)) registry.registerFunc("rf_localMultiply", bb(Multiply.apply)) @@ -360,11 +363,11 @@ object ZeroSevenCompatibilityKit { registry.registerFunc("rf_localAggMin", ub(LocalTileOpAggregate.LocalMinUDAF.apply)) registry.registerFunc("rf_localAggCount", ub(LocalCountAggregate.LocalDataCellsUDAF.apply)) registry.registerFunc("rf_localAggMean", ub(LocalMeanAggregate.apply)) + registry.registerFunc("rf_reprojectGeometry", tb(ReprojectGeometry.apply)) sqlContext.udf.register("rf_makeConstantTile", F.makeConstantTile) sqlContext.udf.register("rf_tileZeros", F.tileZeros) sqlContext.udf.register("rf_tileOnes", F.tileOnes) sqlContext.udf.register("rf_cellTypes", F.cellTypes) - sqlContext.udf.register("rf_reprojectGeometry", F.reprojectGeometryCRSName) } } diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/debug/package.scala b/core/src/main/scala/org/locationtech/rasterframes/util/debug/package.scala similarity index 80% rename from core/src/main/scala/astraea/spark/rasterframes/util/debug/package.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/debug/package.scala index 53b4b6aee..e33529b02 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/debug/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/debug/package.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,12 +15,13 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.util +package org.locationtech.rasterframes.util -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.proj4.LatLng import geotrellis.vector.{Feature, Geometry} import geotrellis.vector.io.json.JsonFeatureCollection @@ -32,21 +33,21 @@ import spray.json.JsValue * @since 4/6/18 */ package object debug { - implicit class RasterFrameWithDebug(val self: RasterFrame) { + implicit class RasterFrameWithDebug(val self: RasterFrameLayer) { /** Renders the whole schema with metadata as a JSON string. */ def describeFullSchema: String = { self.schema.prettyJson } - /** Renders all the extents in this RasterFrame as GeoJSON in EPSG:4326. This does a full + /** Renders all the extents in this RasterFrameLayer as GeoJSON in EPSG:4326. This does a full * table scan and collects **all** the geometry into the driver, and then converts it into a * Spray JSON data structure. Not performant, and for debugging only. */ def geoJsonExtents: JsValue = { import spray.json.DefaultJsonProtocol._ val features = self - .select(BOUNDS_COLUMN, SPATIAL_KEY_COLUMN) + .select(GEOMETRY_COLUMN, SPATIAL_KEY_COLUMN) .collect() .map{ case (p, s) ⇒ Feature(Geometry(p).reproject(self.crs, LatLng), Map("col" -> s.col, "row" -> s.row)) } diff --git a/core/src/main/scala/astraea/spark/rasterframes/util/package.scala b/core/src/main/scala/org/locationtech/rasterframes/util/package.scala similarity index 79% rename from core/src/main/scala/astraea/spark/rasterframes/util/package.scala rename to core/src/main/scala/org/locationtech/rasterframes/util/package.scala index 02a365cea..e94869986 100644 --- a/core/src/main/scala/astraea/spark/rasterframes/util/package.scala +++ b/core/src/main/scala/org/locationtech/rasterframes/util/package.scala @@ -15,28 +15,32 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes -import geotrellis.proj4.CRS -import geotrellis.raster -import geotrellis.raster.{CellGrid, Tile, isNoData} +import com.typesafe.scalalogging.Logger import geotrellis.raster.crop.TileCropMethods import geotrellis.raster.io.geotiff.reader.GeoTiffReader import geotrellis.raster.mapalgebra.local.LocalTileBinaryOp import geotrellis.raster.mask.TileMaskMethods import geotrellis.raster.merge.TileMergeMethods import geotrellis.raster.prototype.TilePrototypeMethods +import geotrellis.raster.{CellGrid, Tile, isNoData} import geotrellis.spark.Bounds import geotrellis.spark.tiling.TilerKeyMethods -import geotrellis.util.{ByteReader, GetComponent, LazyLogging} -import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression} +import geotrellis.util.{ByteReader, GetComponent} +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.functions._ import org.apache.spark.sql.rf._ import org.apache.spark.sql.types.StringType -import org.apache.spark.sql.{Column, DataFrame, SQLContext} +import org.apache.spark.sql._ +import org.slf4j.LoggerFactory import spire.syntax.cfor._ import scala.Boolean.box @@ -46,7 +50,10 @@ import scala.Boolean.box * * @since 12/18/17 */ -package object util extends LazyLogging { +package object util { + @transient + protected lazy val logger: Logger = + Logger(LoggerFactory.getLogger("org.locationtech.rasterframes")) import reflect.ClassTag import reflect.runtime.universe._ @@ -71,34 +78,28 @@ package object util extends LazyLogging { type KeyMethodsProvider[K1, K2] = K1 ⇒ TilerKeyMethods[K1, K2] - /** Internal method for slapping the RasterFrame seal of approval on a DataFrame. */ - private[rasterframes] def certifyRasterframe(df: DataFrame): RasterFrame = + /** Internal method for slapping the RasterFrameLayer seal of approval on a DataFrame. */ + private[rasterframes] def certifyRasterframe(df: DataFrame): RasterFrameLayer = shapeless.tag[RasterFrameTag][DataFrame](df) /** Tags output column with a nicer name. */ private[rasterframes] - def withAlias(name: String, inputs: Column*)(output: Column) = { + def withAlias(name: String, inputs: Column*)(output: Column): Column = { val paramNames = inputs.map(_.columnName).mkString(",") output.as(s"$name($paramNames)") } + /** Tags output column with a nicer name, yet strongly typed. */ + private[rasterframes] + def withTypedAlias[T: Encoder](name: String, inputs: Column*)(output: Column): TypedColumn[Any, T] = + withAlias(name, inputs: _*)(output).as[T] + /** Derives and operator name from the implementing object name. */ private[rasterframes] def opName(op: LocalTileBinaryOp) = op.getClass.getSimpleName.replace("$", "").toLowerCase - object CRSParser { - def apply(value: String): CRS = { - value match { - case e if e.toUpperCase().startsWith("EPSG") => CRS.fromName(e) //not case-sensitive - case p if p.startsWith("+proj") => CRS.fromString(p) // case sensitive - case w if w.toUpperCase().startsWith("GEOGCS") => CRS.fromWKT(w) //only case-sensitive inside double quotes - case _ ⇒ throw new IllegalArgumentException("crs string must be either EPSG code, +proj string, or OGC WKT") - } - } - } - implicit class WithCombine[T](left: Option[T]) { def combine[A, R >: A](a: A)(f: (T, A) ⇒ R): R = left.map(f(_, a)).getOrElse(a) def tupleWith[R](right: Option[R]): Option[(T, R)] = left.flatMap(l ⇒ right.map((l, _))) @@ -107,7 +108,9 @@ package object util extends LazyLogging { implicit class ExpressionWithName(val expr: Expression) extends AnyVal { import org.apache.spark.sql.catalyst.expressions.Literal def name: String = expr match { - case n: NamedExpression ⇒ n.name + case n: NamedExpression if n.resolved ⇒ n.name + case UnresolvedAttribute(parts) => parts.mkString("_") + case Alias(_, name) => name case l: Literal if l.dataType == StringType ⇒ String.valueOf(l.value) case o ⇒ o.toString } @@ -181,6 +184,27 @@ package object util extends LazyLogging { } } + implicit class DFWithPrettyPrint(val df: Dataset[_]) extends AnyVal { + def toMarkdown(numRows: Int = 5, truncate: Boolean = false): String = { + import df.sqlContext.implicits._ + val cols = df.columns + val header = cols.mkString("| ", " | ", " |") + "\n" + ("|---" * cols.length) + "|\n" + val stringifiers = cols + .map(c => s"`$c`") + .map(c => df.col(c).cast(StringType)) + .map(c => if (truncate) substring(c, 1, 40) else c) + val cat = concat_ws(" | ", stringifiers: _*) + val body = df + .select(cat).limit(numRows) + .as[String] + .collect() + .map(_.replaceAll("\\[", "\\\\[")) + .map(_.replace('\n', '↩')) + .mkString("| ", " |\n| ", " |") + header + body + } + } + object Shims { // GT 1.2.1 to 2.0.0 def toArrayTile[T <: CellGrid](tile: T): T = diff --git a/core/src/test/resources/B01.jp2 b/core/src/test/resources/B01.jp2 new file mode 100644 index 000000000..18de22f54 Binary files /dev/null and b/core/src/test/resources/B01.jp2 differ diff --git a/core/src/test/resources/L8-B4-Elkton-VA-4326.tiff b/core/src/test/resources/L8-B4-Elkton-VA-4326.tiff new file mode 100644 index 000000000..2bc57e255 Binary files /dev/null and b/core/src/test/resources/L8-B4-Elkton-VA-4326.tiff differ diff --git a/core/src/test/resources/L8-archive.zip b/core/src/test/resources/L8-archive.zip new file mode 100644 index 000000000..93afb4db4 Binary files /dev/null and b/core/src/test/resources/L8-archive.zip differ diff --git a/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.idx b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.idx new file mode 100644 index 000000000..f86df2587 Binary files /dev/null and b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.idx differ diff --git a/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.lrc b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.lrc new file mode 100644 index 000000000..75163d4a2 Binary files /dev/null and b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.lrc differ diff --git a/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf new file mode 100644 index 000000000..8245c4a7e --- /dev/null +++ b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf @@ -0,0 +1,12 @@ + + + + + LERC + + + + + PROJCS["unnamed",GEOGCS["Unknown datum based upon the custom spheroid",DATUM["Not specified (based on custom spheroid)",SPHEROID["Custom spheroid",6371007.181,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Sinusoidal"],PARAMETER["longitude_of_center",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["Meter",1]] + + diff --git a/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf.aux.xml b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf.aux.xml new file mode 100644 index 000000000..5a18f6944 --- /dev/null +++ b/core/src/test/resources/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf.aux.xml @@ -0,0 +1,92 @@ + + + LERC + PIXEL + + + 06121997 + MODIS + MODIS + Terra + Aqua + MODIS + MODIS + Passed + Passed was set as a default value. More algorithm will be developed + 0 + AMBRALS_V4.0R1 + v1.0500m + 15.0 + 463.312716527778 + volume + 2400 + 2400 + Day + Mandatory QA: + 0 = processed, good quality (full BRDF inversions) + 1 = processed, see other QA (magnitude BRDF inversions) + + 6.1 + 150.120692476232 + N + False + 75.0 + 86400 + 43200 + 19.9448109058663, 30.0666177912155, 29.9990071837477, 19.8789125843729 + 127.31379517564, 138.161359988435, 150.130532080915, 138.321766284772 + 1, 2, 3, 4 + HDFEOS_V2.19 + 30 + 10.5067/MODIS/MCD43A4.006 + 10.5067/MODIS/MCD43A4.006 + http://dx.doi.org + http://dx.doi.org + MYD09GA.A2019113.h30v06.006.2019115025936.hdf, MYD09GA.A2019114.h30v06.006.2019117021858.hdf, MYD09GA.A2019115.h30v06.006.2019117044251.hdf, MYD09GA.A2019116.h30v06.006.2019118031111.hdf, MYD09GA.A2019117.h30v06.006.2019119025916.hdf, MYD09GA.A2019118.h30v06.006.2019120030848.hdf, MOD09GA.A2019113.h30v06.006.2019115032521.hdf, MOD09GA.A2019114.h30v06.006.2019116030646.hdf, MOD09GA.A2019115.h30v06.006.2019117050730.hdf, MOD09GA.A2019116.h30v06.006.2019118032616.hdf, MOD09GA.A2019117.h30v06.006.2019119032020.hdf, MOD09GA.A2019118.h30v06.006.2019120032257.hdf, MCD43DB.A2019110.6.h30v06.hdf + MCD43A4.A2019111.h30v06.006.2019120033434.hdf + 6.1.34 + MODIS/Terra+Aqua BRDF/Albedo Nadir BRDF-Adjusted Ref Daily L3 Global - 500m + BRDF_Albedo_Band_Mandatory_Quality_Band1 + 0 + 500m + 29.9999999973059 + 1 + NOT SET + 0 + 0 + 0 + 100 + 0 + 6.0.42 + MODAPS + Linux minion7043 3.10.0-957.5.1.el7.x86_64 #1 SMP Fri Feb 1 14:54:57 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux + 2019-04-30T03:34:48.000Z + 0 + 0 + 99 + 0 + 2019-04-13 + 00:00:00.000000 + 2019-04-28 + 23:59:59.999999 + processed once + further update is anticipated + Not Investigated + See http://landweb.nascom/nasa.gov/cgi-bin/QA_WWW/qaFlagPage.cgi?sat=aqua the product Science Quality status. + 06121997 + MCD43A4 + 19.9999999982039 + 2015 + 51030006 + concatenated flags + 0, 254 + 6 + 6 + 127.701332684185 + 255 + + + BRDF_Albedo_Band_Mandatory_Quality_Band1 + concatenated flags + + diff --git a/core/src/test/resources/README.md b/core/src/test/resources/README.md new file mode 100644 index 000000000..70aa76cd6 --- /dev/null +++ b/core/src/test/resources/README.md @@ -0,0 +1,8 @@ +# Test resources + +## NAIP Virginia UTM overlaps + + 1. `m_3607717_sw_18_1_20160620_subset.tif` the southwest corner of NAIP m_3607717_sw_18_1. It is in its native CRS EPSG:26918 + 2. `m_3607824_se_17_1_20160620_subset.tif` the southeast corner of NAIP m_3607824_se_17_1. Overlaps number 1; It is to the east. It is in its native CRS EPSG:26917. + 3. `m_3607_box.tif` - an aribtrary burned in polygon in EPSG:4326 partially overlapping both of the above NAIP subsets 1 and 2. + diff --git a/core/src/test/resources/application.conf b/core/src/test/resources/application.conf new file mode 100644 index 000000000..b274441dd --- /dev/null +++ b/core/src/test/resources/application.conf @@ -0,0 +1,12 @@ +gdal { + settings { + options { + // See https://trac.osgeo.org/gdal/wiki/ConfigOptions for options + CPL_DEBUG = "OFF" + // TIFF_USE_OVR = "NO" + // GDAL_TIFF_INTERNAL_MASK = "YES" + } + // set this to `false` if CPL_DEBUG is `ON` + useExceptions: true + } +} \ No newline at end of file diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties index 378ae8e61..39e791fa3 100644 --- a/core/src/test/resources/log4j.properties +++ b/core/src/test/resources/log4j.properties @@ -30,15 +30,15 @@ log4j.logger.org.apache.spark.repl.Main=WARN log4j.logger.org.apache=ERROR log4j.logger.com.amazonaws=WARN -log4j.logger.geotrellis=INFO +log4j.logger.geotrellis=WARN # Settings to quiet third party logs that are too verbose log4j.logger.org.spark_project.jetty=WARN log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO -log4j.logger.astraea.spark.rasterframes=DEBUG -log4j.logger.astraea.spark.rasterframes.ref=TRACE +log4j.logger.org.locationtech.rasterframes=WARN +log4j.logger.org.locationtech.rasterframes.ref=WARN log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support diff --git a/core/src/test/resources/m_3607717_sw_18_1_20160620_subset.tif b/core/src/test/resources/m_3607717_sw_18_1_20160620_subset.tif new file mode 100644 index 000000000..862c23645 Binary files /dev/null and b/core/src/test/resources/m_3607717_sw_18_1_20160620_subset.tif differ diff --git a/core/src/test/resources/m_3607824_se_17_1_20160620_subset.tif b/core/src/test/resources/m_3607824_se_17_1_20160620_subset.tif new file mode 100644 index 000000000..2b2a7d497 Binary files /dev/null and b/core/src/test/resources/m_3607824_se_17_1_20160620_subset.tif differ diff --git a/core/src/test/resources/m_3607_box.tif b/core/src/test/resources/m_3607_box.tif new file mode 100644 index 000000000..50570db26 Binary files /dev/null and b/core/src/test/resources/m_3607_box.tif differ diff --git a/core/src/test/scala/Scratch.sc b/core/src/test/scala/Scratch.sc deleted file mode 100644 index e69de29bb..000000000 diff --git a/core/src/test/scala/astraea/spark/rasterframes/GeometryOperationsSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/GeometryOperationsSpec.scala deleted file mode 100644 index 28d0bcb94..000000000 --- a/core/src/test/scala/astraea/spark/rasterframes/GeometryOperationsSpec.scala +++ /dev/null @@ -1,76 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea. Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * - */ - -package astraea.spark.rasterframes - -import java.nio.file.{Files, Paths} - -import geotrellis.proj4.LatLng -import geotrellis.vector.io._ -import geotrellis.vector.io.json.JsonFeatureCollection -import spray.json.DefaultJsonProtocol._ -import spray.json._ - -import scala.collection.JavaConversions._ - -/** - * - * - * @since 5/30/18 - */ -class GeometryOperationsSpec extends TestEnvironment with TestData { - val geoJson = { - val p = Paths.get(getClass.getResource("/L8-Labels-Elkton-VA.geojson").toURI) - Files.readAllLines(p).mkString("\n") - } - - describe("Geometery operations") { - import spark.implicits._ - it("should rasterize geometry") { - val rf = l8Sample(1).projectedRaster.toRF.withBounds() - - val features = geoJson.parseGeoJson[JsonFeatureCollection].getAllPolygonFeatures[JsObject]() - val df = features.map(f ⇒ ( - f.geom.reproject(LatLng, rf.crs).jtsGeom, - f.data.fields("id").asInstanceOf[JsNumber].value.intValue() - )).toDF("geom", "id") - - val toRasterize = rf.crossJoin(df) - - val tlm = rf.tileLayerMetadata.merge - - val (cols, rows) = tlm.layout.tileLayout.tileDimensions - - val rasterized = toRasterize.withColumn("rasterized", rasterize($"geom", $"bounds", $"id", cols, rows)) - - assert(rasterized.count() === df.count() * rf.count()) - assert(rasterized.select(tile_dimensions($"rasterized")).distinct().count() === 1) - val pixelCount = rasterized.select(agg_data_cells($"rasterized")).first() - assert(pixelCount < cols * rows) - - - toRasterize.createOrReplaceTempView("stuff") - val viaSQL = sql(s"select rf_rasterize(geom, bounds, id, $cols, $rows) as rasterized from stuff") - assert(viaSQL.select(agg_data_cells($"rasterized")).first === pixelCount) - - //rasterized.select($"rasterized".as[Tile]).foreach(t ⇒ t.renderPng(ColorMaps.IGBP).write("target/" + t.hashCode() + ".png")) - } - } -} diff --git a/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala deleted file mode 100644 index 39ea3b1c1..000000000 --- a/core/src/test/scala/astraea/spark/rasterframes/ReprojectGeometrySpec.scala +++ /dev/null @@ -1,98 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2019 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes - -import com.vividsolutions.jts.geom._ -import geotrellis.proj4.{CRS, LatLng, Sinusoidal, WebMercator} -import org.apache.spark.sql.Encoders -import org.scalatest.{FunSpec, Matchers} - -/** - * Test for geometry reprojection. - * - * @since 11/29/18 - */ -class ReprojectGeometrySpec extends FunSpec - with TestEnvironment with Matchers { - import spark.implicits._ - - describe("Geometry reprojection") { - it("should allow reprojection geometry") { - // Note: Test data copied from ReprojectSpec in GeoTrellis - val fact = new GeometryFactory() - val latLng: Geometry = fact.createLineString(Array( - new Coordinate(-111.09374999999999, 34.784483415461345), - new Coordinate(-111.09374999999999, 43.29919735147067), - new Coordinate(-75.322265625, 43.29919735147067), - new Coordinate(-75.322265625, 34.784483415461345), - new Coordinate(-111.09374999999999, 34.784483415461345) - )) - - val webMercator: Geometry = fact.createLineString(Array( - new Coordinate(-12366899.680315234, 4134631.734001753), - new Coordinate(-12366899.680315234, 5357624.186564572), - new Coordinate(-8384836.254770693, 5357624.186564572), - new Coordinate(-8384836.254770693, 4134631.734001753), - new Coordinate(-12366899.680315234, 4134631.734001753) - )) - - withClue("both literal crs") { - - val df = Seq((latLng, webMercator)).toDF("ll", "wm") - - val rp = df.select( - reproject_geometry($"ll", LatLng, WebMercator) as "wm2", - reproject_geometry($"wm", WebMercator, LatLng) as "ll2", - reproject_geometry(reproject_geometry($"ll", LatLng, Sinusoidal), Sinusoidal, WebMercator) as "wm3" - ).as[(Geometry, Geometry, Geometry)] - - - val (wm2, ll2, wm3) = rp.first() - - wm2 should matchGeom(webMercator, 0.00001) - ll2 should matchGeom(latLng, 0.00001) - wm3 should matchGeom(webMercator, 0.00001) - } - - withClue("one literal crs") { - implicit val enc = Encoders.tuple(jtsGeometryEncoder, jtsGeometryEncoder, crsEncoder) - - val df = Seq((latLng, webMercator, LatLng: CRS)).toDF("ll", "wm", "llCRS") - - val rp = df.select( - reproject_geometry($"ll", $"llCRS", WebMercator) as "wm2", - reproject_geometry($"wm", WebMercator, $"llCRS") as "ll2", - reproject_geometry(reproject_geometry($"ll", $"llCRS", Sinusoidal), Sinusoidal, WebMercator) as "wm3" - ).as[(Geometry, Geometry, Geometry)] - - - val (wm2, ll2, wm3) = rp.first() - - wm2 should matchGeom(webMercator, 0.00001) - ll2 should matchGeom(latLng, 0.00001) - wm3 should matchGeom(webMercator, 0.00001) - - } - } - } - -} diff --git a/core/src/test/scala/astraea/spark/rasterframes/ml/TileExploderSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ml/TileExploderSpec.scala deleted file mode 100644 index 8883045e1..000000000 --- a/core/src/test/scala/astraea/spark/rasterframes/ml/TileExploderSpec.scala +++ /dev/null @@ -1,26 +0,0 @@ -package astraea.spark.rasterframes.ml - -import astraea.spark.rasterframes.{TestData, TestEnvironment} -import geotrellis.raster.Tile -import org.apache.spark.sql.functions.lit -/** - * - * @since 2/16/18 - */ -class TileExploderSpec extends TestEnvironment with TestData { - describe("Tile explode transformer") { - it("should explode tiles") { - import spark.implicits._ - val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2").withColumn("other", lit("stuff")) - - val exploder = new TileExploder() - val newSchema = exploder.transformSchema(df.schema) - - val exploded = exploder.transform(df) - assert(newSchema === exploded.schema) - assert(exploded.columns.length === 5) - assert(exploded.count() === 9) - write(exploded) - } - } -} diff --git a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala b/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala deleted file mode 100644 index 1c1fb182a..000000000 --- a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterSourceSpec.scala +++ /dev/null @@ -1,166 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * SPDX-License-Identifier: Apache-2.0 - * - */ - -package astraea.spark.rasterframes.ref - -import java.net.URI - -import astraea.spark.rasterframes.TestEnvironment.ReadMonitor -import astraea.spark.rasterframes.ref.RasterSource.FileGeoTiffRasterSource -import astraea.spark.rasterframes.{TestData, TestEnvironment} -import geotrellis.raster.io.geotiff.GeoTiff -import geotrellis.vector.Extent -import org.apache.spark.sql.rf.RasterSourceUDT - -/** - * - * - * @since 8/22/18 - */ -class RasterSourceSpec extends TestEnvironment with TestData { - def sub(e: Extent) = { - val c = e.center - val w = e.width - val h = e.height - Extent(c.x, c.y, c.x + w * 0.1, c.y + h * 0.1) - } - - describe("General RasterSource") { - it("should identify as UDT") { - assert(new RasterSourceUDT() === new RasterSourceUDT()) - } - } - - describe("HTTP RasterSource") { - it("should support metadata querying over HTTP") { - withClue("remoteCOGSingleband") { - val src = RasterSource(remoteCOGSingleband1) - assert(!src.extent.isEmpty) - } - withClue("remoteCOGMultiband") { - val src = RasterSource(remoteCOGMultiband) - assert(!src.extent.isEmpty) - } - } - it("should read sub-tile") { - withClue("remoteCOGSingleband") { - val src = RasterSource(remoteCOGSingleband1) - val Left(raster) = src.read(sub(src.extent)) - assert(raster.size > 0 && raster.size < src.size) - } - withClue("remoteCOGMultiband") { - val src = RasterSource(remoteCOGMultiband) - //println("CoG size", src.size, src.dimensions) - val Right(raster) = src.read(sub(src.extent)) - //println("Subtile size", raster.size, raster.dimensions) - assert(raster.size > 0 && raster.size < src.size) - } - } - it("should Java serialize") { - import java.io._ - val src = RasterSource(remoteCOGSingleband1) - val buf = new java.io.ByteArrayOutputStream() - val out = new ObjectOutputStream(buf) - out.writeObject(src) - out.close() - - val data = buf.toByteArray - val in = new ObjectInputStream(new ByteArrayInputStream(data)) - val recovered = in.readObject().asInstanceOf[RasterSource] - assert(src.toString === recovered.toString) - } - } - describe("File RasterSource") { - it("should support metadata querying of file") { - val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toUri - val src = RasterSource(localSrc) - assert(!src.extent.isEmpty) - } - } - - describe("Caching") { - val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toUri - - trait Fixture { - val counter = ReadMonitor(false) - val src = RasterSource(localSrc, Some(counter)) - } - - it("should cache headers")(new Fixture { - val e = src.extent - assert(counter.reads === 1) - - val c = src.crs - val e2 = src.extent - val ct = src.cellType - assert(counter.reads === 1) - }) - - it("should Spark serialize caching")(new Fixture { - - import spark.implicits._ - - assert(src.isInstanceOf[FileGeoTiffRasterSource]) - - val e = src.extent - assert(counter.reads === 1) - - val df = Seq(src, src, src).toDS.repartition(3) - val src2 = df.collect()(1) - - val e2 = src2.extent - val ct = src2.cellType - - src2 match { - case fs: FileGeoTiffRasterSource ⇒ - fs.callback match { - case Some(cb: ReadMonitor) ⇒ assert(cb.reads === 1) - case o ⇒ fail(s"Expected '$o' to be a ReadMonitor") - } - case o ⇒ fail(s"Expected '$o' to be FileGeoTiffRasterSource") - } - }) - } - - describe("RasterSourceToTiles Expression") { - it("should read all tiles") { - val src = RasterSource(remoteMODIS) - - val subrasters = src.readAll().left.get - - val collected = subrasters.map(_.extent).reduceLeft(_.combine(_)) - - assert(src.extent.xmin === collected.xmin +- 0.01) - assert(src.extent.ymin === collected.ymin +- 0.01) - assert(src.extent.xmax === collected.xmax +- 0.01) - assert(src.extent.ymax === collected.ymax +- 0.01) - - val totalCells = subrasters.map(_.size).sum - - assert(totalCells === src.size) - - subrasters.zipWithIndex.foreach{case (r, i) ⇒ - // TODO: how to test? - GeoTiff(r, src.crs).write(s"target/$i.tiff") - } - } - } -} diff --git a/core/src/test/scala/examples/Classification.scala b/core/src/test/scala/examples/Classification.scala deleted file mode 100644 index 4aebcf742..000000000 --- a/core/src/test/scala/examples/Classification.scala +++ /dev/null @@ -1,160 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package examples - -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.ml.{NoDataFilter, TileExploder} -import geotrellis.raster._ -import geotrellis.raster.io.geotiff.reader.GeoTiffReader -import geotrellis.raster.render.{ColorRamps, IndexedColorMap} -import org.apache.spark.ml.Pipeline -import org.apache.spark.ml.classification.DecisionTreeClassifier -import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator -import org.apache.spark.ml.feature.VectorAssembler -import org.apache.spark.ml.tuning.{CrossValidator, ParamGridBuilder} -import org.apache.spark.sql._ - -object Classification extends App { - -// // Utility for reading imagery from our test data set - def readTiff(name: String) = GeoTiffReader.readSingleband(getClass.getResource(s"/$name").getPath) - - implicit val spark = SparkSession.builder() - .master("local[*]") - .appName(getClass.getName) - .getOrCreate() - .withRasterFrames - - import spark.implicits._ - - // The first step is to load multiple bands of imagery and construct - // a single RasterFrame from them. - val filenamePattern = "L8-%s-Elkton-VA.tiff" - val bandNumbers = 2 to 7 - val bandColNames = bandNumbers.map(b ⇒ s"band_$b").toArray - val tileSize = 10 - - // For each identified band, load the associated image file - val joinedRF = bandNumbers - .map { b ⇒ (b, filenamePattern.format("B" + b)) } - .map { case (b, f) ⇒ (b, readTiff(f)) } - .map { case (b, t) ⇒ t.projectedRaster.toRF(tileSize, tileSize, s"band_$b") } - .reduce(_ spatialJoin _) - - // We should see a single spatial_key column along with 4 columns of tiles. - joinedRF.printSchema() - - // Similarly pull in the target label data. - val targetCol = "target" - - // Load the target label raster. We have to convert the cell type to - // Double to meet expectations of SparkML - val target = readTiff(filenamePattern.format("Labels")) - .mapTile(_.convert(DoubleConstantNoDataCellType)) - .projectedRaster - .toRF(tileSize, tileSize, targetCol) - - // Take a peek at what kind of label data we have to work with. - target.select(agg_stats(target(targetCol))).show - - val abt = joinedRF.spatialJoin(target) - - // SparkML requires that each observation be in its own row, and those - // observations be packed into a single `Vector`. The first step is to - // "explode" the tiles into a single row per cell/pixel - val exploder = new TileExploder() - - val noDataFilter = new NoDataFilter() - .setInputCols(bandColNames :+ targetCol) - - // To "vectorize" the the band columns we use the SparkML `VectorAssembler` - val assembler = new VectorAssembler() - .setInputCols(bandColNames) - .setOutputCol("features") - - // Using a decision tree for classification - val classifier = new DecisionTreeClassifier() - .setLabelCol(targetCol) - .setFeaturesCol(assembler.getOutputCol) - - // Assemble the model pipeline - val pipeline = new Pipeline() - .setStages(Array(exploder, noDataFilter, assembler, classifier)) - - // Configure how we're going to evaluate our model's performance. - val evaluator = new MulticlassClassificationEvaluator() - .setLabelCol(targetCol) - .setPredictionCol("prediction") - .setMetricName("f1") - - // Use a parameter grid to determine what the optimal max tree depth is for this data - val paramGrid = new ParamGridBuilder() - //.addGrid(classifier.maxDepth, Array(1, 2, 3, 4)) - .build() - - // Configure the cross validator - val trainer = new CrossValidator() - .setEstimator(pipeline) - .setEvaluator(evaluator) - .setEstimatorParamMaps(paramGrid) - .setNumFolds(4) - - // Push the "go" button - val model = trainer.fit(abt) - - // Format the `paramGrid` settings resultant model - val metrics = model.getEstimatorParamMaps - .map(_.toSeq.map(p ⇒ s"${p.param.name} = ${p.value}")) - .map(_.mkString(", ")) - .zip(model.avgMetrics) - - // Render the parameter/performance association - metrics.toSeq.toDF("params", "metric").show(false) - - // Score the original data set, including cells - // without target values. - val scored = model.bestModel.transform(joinedRF) - - // Add up class membership results - scored.groupBy($"prediction" as "class").count().show - - scored.show(10) - - val tlm = joinedRF.tileLayerMetadata.left.get - - val retiled = scored.groupBy($"spatial_key").agg( - assemble_tile( - $"column_index", $"row_index", $"prediction", - tlm.tileCols, tlm.tileRows, IntConstantNoDataCellType - ) - ) - - val rf = retiled.asRF($"spatial_key", tlm) - - val raster = rf.toRaster($"prediction", 186, 169) - - val clusterColors = IndexedColorMap.fromColorMap( - ColorRamps.Viridis.toColorMap((0 until 3).toArray) - ) - - raster.tile.renderPng(clusterColors).write("target/scala-2.11/tut/ml/classified.png") - - spark.stop() -} diff --git a/core/src/test/scala/examples/Clustering.scala b/core/src/test/scala/examples/Clustering.scala deleted file mode 100644 index 2f8d4ce1f..000000000 --- a/core/src/test/scala/examples/Clustering.scala +++ /dev/null @@ -1,108 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package examples - -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.ml.TileExploder -import geotrellis.raster.ByteConstantNoDataCellType -import geotrellis.raster.io.geotiff.reader.GeoTiffReader -import geotrellis.raster.render.{ColorRamps, IndexedColorMap} -import org.apache.spark.ml.Pipeline -import org.apache.spark.ml.clustering.{KMeans, KMeansModel} -import org.apache.spark.ml.feature.VectorAssembler -import org.apache.spark.sql._ - -object Clustering extends App { - - // Utility for reading imagery from our test data set - def readTiff(name: String) = GeoTiffReader.readSingleband(getClass.getResource(s"/$name").getPath) - - implicit val spark = SparkSession.builder().master("local[*]").appName(getClass.getName).getOrCreate().withRasterFrames - - import spark.implicits._ - - // The first step is to load multiple bands of imagery and construct - // a single RasterFrame from them. - val filenamePattern = "L8-B%d-Elkton-VA.tiff" - val bandNumbers = 1 to 7 - val bandColNames = bandNumbers.map(b ⇒ s"band_$b").toArray - - // For each identified band, load the associated image file - val joinedRF = bandNumbers - .map { b ⇒ (b, filenamePattern.format(b)) } - .map { case (b,f) ⇒ (b, readTiff(f)) } - .map { case (b, t) ⇒ t.projectedRaster.toRF(s"band_$b") } - .reduce(_ spatialJoin _) - - // We should see a single spatial_key column along with 4 columns of tiles. - joinedRF.printSchema() - - // SparkML requires that each observation be in its own row, and those - // observations be packed into a single `Vector`. The first step is to - // "explode" the tiles into a single row per cell/pixel - val exploder = new TileExploder() - - // To "vectorize" the the band columns we use the SparkML `VectorAssembler` - val assembler = new VectorAssembler() - .setInputCols(bandColNames) - .setOutputCol("features") - - // Configure our clustering algorithm - val k = 5 - val kmeans = new KMeans().setK(k) - - // Combine the two stages - val pipeline = new Pipeline().setStages(Array(exploder, assembler, kmeans)) - - // Compute clusters - val model = pipeline.fit(joinedRF) - - // Run the data through the model to assign cluster IDs to each - val clustered = model.transform(joinedRF) - clustered.show(8) - - // If we want to inspect the model statistics, the SparkML API requires us to go - // through this unfortunate contortion: - val clusterResults = model.stages.collect{ case km: KMeansModel ⇒ km}.head - - // Compute sum of squared distances of points to their nearest center - val metric = clusterResults.computeCost(clustered) - println("Within set sum of squared errors: " + metric) - - val tlm = joinedRF.tileLayerMetadata.left.get - - val retiled = clustered.groupBy($"spatial_key").agg( - assemble_tile( - $"column_index", $"row_index", $"prediction", - tlm.tileCols, tlm.tileRows, ByteConstantNoDataCellType) - ) - - val rf = retiled.asRF($"spatial_key", tlm) - - val raster = rf.toRaster($"prediction", 186, 169) - - val clusterColors = IndexedColorMap.fromColorMap( - ColorRamps.Viridis.toColorMap((0 until k).toArray) - ) - - raster.tile.renderPng(clusterColors).write("clustered.png") - - spark.stop() -} diff --git a/core/src/test/scala/examples/CreatingRasterFrames.scala b/core/src/test/scala/examples/CreatingRasterFrames.scala index f7a69043a..8b5c00c72 100644 --- a/core/src/test/scala/examples/CreatingRasterFrames.scala +++ b/core/src/test/scala/examples/CreatingRasterFrames.scala @@ -27,19 +27,17 @@ package examples object CreatingRasterFrames extends App { // # Creating RasterFrames // -// There are a number of ways to create a `RasterFrame`, as enumerated in the sections below. +// There are a number of ways to create a `RasterFrameLayer`, as enumerated in the sections below. // // ## Initialization // // First, some standard `import`s: - import astraea.spark.rasterframes._ + import org.locationtech.rasterframes._ import geotrellis.raster._ - import geotrellis.raster.render._ - import geotrellis.spark.io._ import geotrellis.raster.io.geotiff.SinglebandGeoTiff + import geotrellis.spark.io._ import org.apache.spark.sql._ - import org.apache.spark.sql.functions._ // Next, initialize the `SparkSession`, and call the `withRasterFrames` method on it: @@ -47,27 +45,25 @@ object CreatingRasterFrames extends App { master("local[*]").appName("RasterFrames"). getOrCreate(). withRasterFrames - - import spark.implicits._ spark.sparkContext.setLogLevel("ERROR") // ## From `ProjectedExtent` // -// The simplest mechanism for getting a RasterFrame is to use the `toRF(tileCols, tileRows)` extension method on `ProjectedRaster`. +// The simplest mechanism for getting a RasterFrameLayer is to use the `toLayer(tileCols, tileRows)` extension method on `ProjectedRaster`. val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff") - val rf = scene.projectedRaster.toRF(128, 128) + val rf = scene.projectedRaster.toLayer(128, 128) rf.show(5, false) // ## From `TileLayerRDD` // -// Another option is to use a GeoTrellis [`LayerReader`](https://docs.geotrellis.io/en/latest/guide/tile-backends.html), to get a `TileLayerRDD` for which there's also a `toRF` extension method. +// Another option is to use a GeoTrellis [`LayerReader`](https://docs.geotrellis.io/en/latest/guide/tile-backends.html), to get a `TileLayerRDD` for which there's also a `toLayer` extension method. // ## Inspecting Structure // -// `RasterFrame` has a number of methods providing access to metadata about the contents of the RasterFrame. +// `RasterFrameLayer` has a number of methods providing access to metadata about the contents of the RasterFrameLayer. // // ### Tile Column Names diff --git a/core/src/test/scala/examples/Exporting.scala b/core/src/test/scala/examples/Exporting.scala index 247e93944..25fa321c1 100644 --- a/core/src/test/scala/examples/Exporting.scala +++ b/core/src/test/scala/examples/Exporting.scala @@ -20,13 +20,11 @@ package examples import java.nio.file.Files -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.raster._ +import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.raster.render._ -import geotrellis.raster.io.geotiff.{GeoTiff, SinglebandGeoTiff} import geotrellis.spark.{LayerId, SpatialKey} -import geotrellis.spark.io.LayerWriter -import geotrellis.spark.io.file.{FileAttributeStore, FileLayerWriter} import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import spray.json.JsValue @@ -40,7 +38,7 @@ object Exporting extends App { import spark.implicits._ val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff") - val rf = scene.projectedRaster.toRF(128, 128).cache() + val rf = scene.projectedRaster.toLayer(128, 128).cache() // While the goal of RasterFrames is to make it as easy as possible to do your geospatial analysis with a single // construct, it is helpful to be able to transform it into other representations for various use cases. @@ -54,17 +52,17 @@ object Exporting extends App { // The @scaladoc[`tile_to_array`][tile_to_array] column function requires a type parameter to indicate the array element // type you would like used. The following types may be used: `Int`, `Double`, `Byte`, `Short`, `Float` - val withArrays = rf.withColumn("tileData", tile_to_array_int($"tile")).drop("tile") + val withArrays = rf.withColumn("tileData", rf_tile_to_array_int($"tile")).drop("tile") withArrays.show(5, 40) // You can convert the data back to an array, but you have to specify the target tile dimensions. - val tileBack = withArrays.withColumn("tileAgain", array_to_tile($"tileData", 128, 128)) + val tileBack = withArrays.withColumn("tileAgain", rf_array_to_tile($"tileData", 128, 128)) tileBack.drop("tileData").show(5, 40) // Note that the created tile will not have a `NoData` value associated with it. Here's how you can do that: - val tileBackAgain = withArrays.withColumn("tileAgain", with_no_data(array_to_tile($"tileData", 128, 128), 3)) + val tileBackAgain = withArrays.withColumn("tileAgain", rf_with_no_data(rf_array_to_tile($"tileData", 128, 128), 3)) tileBackAgain.drop("tileData").show(5, 50) // ## Writing to Parquet @@ -75,15 +73,13 @@ object Exporting extends App { // the imagery types. // // - // Let's assume we have a RasterFrame we've done some fancy processing on: - - import geotrellis.raster.equalization._ + // Let's assume we have a RasterFrameLayer we've done some fancy processing on: val equalizer = udf((t: Tile) => t.equalize()) - val equalized = rf.withColumn("equalized", equalizer($"tile")).asRF + val equalized = rf.withColumn("equalized", equalizer($"tile")).asLayer equalized.printSchema - equalized.select(agg_stats($"tile")).show(false) - equalized.select(agg_stats($"equalized")).show(false) + equalized.select(rf_agg_stats($"tile")).show(false) + equalized.select(rf_agg_stats($"equalized")).show(false) // We write it out just like any other DataFrame, including the ability to specify partitioning: @@ -102,12 +98,12 @@ object Exporting extends App { val rf2 = spark.read.parquet(filePath) rf2.printSchema - equalized.select(agg_stats($"tile")).show(false) - equalized.select(agg_stats($"equalized")).show(false) + equalized.select(rf_agg_stats($"tile")).show(false) + equalized.select(rf_agg_stats($"equalized")).show(false) // ## Converting to `RDD` and `TileLayerRDD` // - // Since a `RasterFrame` is just a `DataFrame` with extra metadata, the method + // Since a `RasterFrameLayer` is just a `DataFrame` with extra metadata, the method // @scaladoc[`DataFrame.rdd`][rdd] is available for simple conversion back to `RDD` space. The type returned // by `.rdd` is dependent upon how you select it. @@ -122,14 +118,14 @@ object Exporting extends App { showType(rf.select(rf.spatialKeyColumn, $"tile").as[(SpatialKey, Tile)].rdd) // If your goal convert a single tile column with its spatial key back to a `TileLayerRDD[K]`, then there's an additional - // extension method on `RasterFrame` called [`toTileLayerRDD`][toTileLayerRDD], which preserves the tile layer metadata, + // extension method on `RasterFrameLayer` called [`toTileLayerRDD`][toTileLayerRDD], which preserves the tile layer metadata, // enhancing interoperation with GeoTrellis RDD extension methods. showType(rf.toTileLayerRDD($"tile".as[Tile])) // ## Exporting a Raster // - // For the purposes of debugging, the RasterFrame tiles can be reassembled back into a raster for viewing. However, + // For the purposes of debugging, the RasterFrameLayer tiles can be reassembled back into a raster for viewing. However, // keep in mind that this will download all the data to the driver, and reassemble it in-memory. So it's not appropriate // for very large coverages. // @@ -151,7 +147,7 @@ object Exporting extends App { // [*Download GeoTIFF*](rf-raster.tiff) // # Exporting to a GeoTrellis Layer - // First, convert the RasterFrame into a TileLayerRDD. The return type is an Either; + // First, convert the RasterFrameLayer into a TileLayerRDD. The return type is an Either; // the `left` side is for spatial-only keyed data val tlRDD = equalized.toTileLayerRDD($"equalized").left.get diff --git a/core/src/test/scala/examples/LocalArithmetic.scala b/core/src/test/scala/examples/LocalArithmetic.scala index ddf666e96..428fcc64a 100644 --- a/core/src/test/scala/examples/LocalArithmetic.scala +++ b/core/src/test/scala/examples/LocalArithmetic.scala @@ -19,7 +19,7 @@ package examples -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.spark.io.kryo.KryoRegistrator import org.apache.spark.serializer.KryoSerializer @@ -49,15 +49,15 @@ object LocalArithmetic extends App { val joinedRF = bandNumbers. map { b ⇒ (b, filenamePattern.format(b)) }. map { case (b, f) ⇒ (b, readTiff(f)) }. - map { case (b, t) ⇒ t.projectedRaster.toRF(s"band_$b") }. + map { case (b, t) ⇒ t.projectedRaster.toLayer(s"band_$b") }. reduce(_ spatialJoin _) - val addRF = joinedRF.withColumn("1+2", local_add(joinedRF("band_1"), joinedRF("band_2"))).asRF - val divideRF = joinedRF.withColumn("1/2", local_divide(joinedRF("band_1"), joinedRF("band_2"))).asRF + val addRF = joinedRF.withColumn("1+2", rf_local_add(joinedRF("band_1"), joinedRF("band_2"))).asLayer + val divideRF = joinedRF.withColumn("1/2", rf_local_divide(joinedRF("band_1"), joinedRF("band_2"))).asLayer addRF.select("1+2").collect().apply(0) .getClass - val raster = divideRF.select(tile_sum(divideRF("1/2")), - tile_sum(joinedRF("band_1")), tile_sum(joinedRF("band_2"))) + val raster = divideRF.select(rf_tile_sum(divideRF("1/2")), + rf_tile_sum(joinedRF("band_1")), rf_tile_sum(joinedRF("band_2"))) raster.show(1) } \ No newline at end of file diff --git a/core/src/test/scala/examples/Masking.scala b/core/src/test/scala/examples/Masking.scala index bc9c59213..6270bcef1 100644 --- a/core/src/test/scala/examples/Masking.scala +++ b/core/src/test/scala/examples/Masking.scala @@ -1,12 +1,11 @@ package examples -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.raster.io.geotiff.SinglebandGeoTiff -import org.apache.spark.sql._ -import geotrellis.raster.{mask => _, _} import geotrellis.raster.render._ +import geotrellis.raster.{mask => _, _} +import org.apache.spark.sql._ import org.apache.spark.sql.functions._ -import astraea.spark.rasterframes.stats.{CellHistogram=>CH} object Masking extends App { @@ -25,18 +24,18 @@ object Masking extends App { val joinedRF = bandNumbers. map { b ⇒ (b, filenamePattern.format(b)) }. map { case (b, f) ⇒ (b, readTiff(f)) }. - map { case (b, t) ⇒ t.projectedRaster.toRF(s"band_$b") }. + map { case (b, t) ⇒ t.projectedRaster.toLayer(s"band_$b") }. reduce(_ spatialJoin _) val threshold = udf((t: Tile) => { t.convert(IntConstantNoDataCellType).map(x => if (x > 10500) x else NODATA) } ) - val withMaskedTile = joinedRF.withColumn("maskTile", threshold(joinedRF("band_1"))).asRF + val withMaskedTile = joinedRF.withColumn("maskTile", threshold(joinedRF("band_1"))).asLayer - withMaskedTile.select(no_data_cells(withMaskedTile("maskTile"))).show() + withMaskedTile.select(rf_no_data_cells(withMaskedTile("maskTile"))).show() - val masked = withMaskedTile.withColumn("masked", mask(joinedRF("band_2"), joinedRF("maskTile"))).asRF + val masked = withMaskedTile.withColumn("masked", rf_mask(joinedRF("band_2"), joinedRF("maskTile"))).asLayer val maskRF = masked.toRaster(masked("masked"), 466, 428) val b2 = masked.toRaster(masked("band_2"), 466, 428) diff --git a/core/src/test/scala/examples/MeanValue.scala b/core/src/test/scala/examples/MeanValue.scala index d2190a241..2ee264469 100644 --- a/core/src/test/scala/examples/MeanValue.scala +++ b/core/src/test/scala/examples/MeanValue.scala @@ -19,10 +19,9 @@ package examples -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.raster.io.geotiff.SinglebandGeoTiff import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.functions._ /** * Compute the cell mean value of an image. @@ -40,12 +39,12 @@ object MeanValue extends App { val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff") - val rf = scene.projectedRaster.toRF(128, 128) // <-- tile size + val rf = scene.projectedRaster.toLayer(128, 128) // <-- tile size rf.printSchema val tileCol = rf("tile") - rf.agg(agg_no_data_cells(tileCol), agg_data_cells(tileCol), agg_mean(tileCol)).show(false) + rf.agg(rf_agg_no_data_cells(tileCol), rf_agg_data_cells(tileCol), rf_agg_mean(tileCol)).show(false) spark.stop() } diff --git a/core/src/test/scala/examples/NDVI.scala b/core/src/test/scala/examples/NDVI.scala index 971dfd8d4..48a6f6e51 100644 --- a/core/src/test/scala/examples/NDVI.scala +++ b/core/src/test/scala/examples/NDVI.scala @@ -20,7 +20,7 @@ package examples import java.nio.file.{Files, Paths} -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ import geotrellis.raster._ import geotrellis.raster.render._ import geotrellis.raster.io.geotiff.{GeoTiff, SinglebandGeoTiff} @@ -46,8 +46,8 @@ object NDVI extends App { import spark.implicits._ - def redBand = readTiff("L8-B4-Elkton-VA.tiff").projectedRaster.toRF("red_band") - def nirBand = readTiff("L8-B5-Elkton-VA.tiff").projectedRaster.toRF("nir_band") + def redBand = readTiff("L8-B4-Elkton-VA.tiff").projectedRaster.toLayer("red_band") + def nirBand = readTiff("L8-B5-Elkton-VA.tiff").projectedRaster.toLayer("nir_band") val ndvi = udf((red: Tile, nir: Tile) => { val redd = red.convert(DoubleConstantNoDataCellType) @@ -55,7 +55,7 @@ object NDVI extends App { (nird - redd) / (nird + redd) }) - val rf = redBand.spatialJoin(nirBand).withColumn("ndvi", ndvi($"red_band", $"nir_band")).asRF + val rf = redBand.spatialJoin(nirBand).withColumn("ndvi", ndvi($"red_band", $"nir_band")).asLayer rf.printSchema() diff --git a/core/src/test/scala/examples/NaturalColorComposite.scala b/core/src/test/scala/examples/NaturalColorComposite.scala index 3dee4092c..1a3e212ac 100644 --- a/core/src/test/scala/examples/NaturalColorComposite.scala +++ b/core/src/test/scala/examples/NaturalColorComposite.scala @@ -20,11 +20,8 @@ package examples -import java.nio.file.{CopyOption, StandardCopyOption} - +import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.raster.{MultibandTile, UByteConstantNoDataCellType} -import geotrellis.raster.io.geotiff.{GeoTiff, SinglebandGeoTiff} -import geotrellis.raster.render._ /** * diff --git a/core/src/test/scala/examples/Scratch.scala b/core/src/test/scala/examples/Scratch.scala deleted file mode 100644 index fca8c4785..000000000 --- a/core/src/test/scala/examples/Scratch.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package examples - -import astraea.spark.rasterframes._ -import geotrellis.spark.io.kryo.KryoRegistrator -import org.apache.spark.serializer.KryoSerializer -import org.apache.spark.sql._ - -/** - * Boilerplate test run file - * - * @since 10/8/17 - */ -object Scratch extends App { - implicit val spark = SparkSession.builder() - .master("local[*]") - .appName(getClass.getName) - .config("spark.serializer", classOf[KryoSerializer].getName) - .config("spark.kryoserializer.buffer.max", "500m") - .config("spark.kryo.registrationRequired", "false") - .config("spark.kryo.registrator", classOf[KryoRegistrator].getName) - .getOrCreate() - .withRasterFrames - - import spark.implicits._ - - // Your Spark code here..... - -} diff --git a/core/src/test/scala/examples/Tour.scala b/core/src/test/scala/examples/Tour.scala deleted file mode 100644 index d69cb5a1c..000000000 --- a/core/src/test/scala/examples/Tour.scala +++ /dev/null @@ -1,140 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2017 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package examples - -import org.apache.spark.sql._ -import org.apache.spark.sql.functions._ -import geotrellis.raster.io.geotiff._ -import geotrellis.raster.{ByteConstantNoDataCellType, Tile} -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.ml.TileExploder -import geotrellis.raster.render.{ColorRamps, IndexedColorMap} -import org.apache.spark.ml.Pipeline -import org.apache.spark.ml.clustering.KMeans -import org.apache.spark.ml.feature.VectorAssembler - -/** - * Example tour of some general features in RasterFrames - * - * @since 10/24/17 - */ -object Tour extends App { - implicit val spark = SparkSession.builder() - .master("local[*]") - .appName(getClass.getName) - .getOrCreate() - .withRasterFrames - - import spark.implicits._ - - // Read in a geo-referenced image - val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff") - - // Convert it to a raster frame, discretizing it into the given tile size. - val rf = scene.projectedRaster.toRF(64, 64) - - // See how many tiles we have after discretization - println("Tile count: " + rf.count()) - - // Take a peek at what we're working with - rf.show(8, false) - - // Confirm we have equally sized tiles - rf.select(tile_dimensions($"tile")).distinct().show() - - // Count the number of no-data cells - rf.select(agg_no_data_cells($"tile")).show(false) - - // Compute per-tile statistics - rf.select(tile_stats($"tile")).show(8, false) - - // Compute some aggregate stats over all cells - rf.select(agg_stats($"tile")).show(false) - - // Create a Spark UDT to perform contrast adjustment via GeoTrellis - val contrast = udf((t: Tile) ⇒ t.sigmoidal(0.2, 10)) - - // Let's contrast adjust the tile column - val withAdjusted = rf.withColumn("adjusted", contrast($"tile")).asRF - - // Show the stats for the adjusted version - withAdjusted.select(agg_stats($"adjusted")).show(false) - - // Reassemble into a raster and save to a file - val raster = withAdjusted.toRaster($"adjusted", 774, 500) - GeoTiff(raster).write("contrast-adjusted.tiff") - - // Perform some arbitrary local ops between columns and render - val withOp = withAdjusted.withColumn("op", local_subtract($"tile", $"adjusted")).asRF - val raster2 = withOp.toRaster($"op", 774, 500) - GeoTiff(raster2).write("with-op.tiff") - - - // Perform k-means clustering - val k = 4 - - // SparkML doesn't like NoData/NaN values, so we set the no-data value to something less offensive - val forML = rf.select(rf.spatialKeyColumn, with_no_data($"tile", 99999) as "tile").asRF - - // First we instantiate the transformer that converts tile rows into cell rows. - val exploder = new TileExploder() - - // This transformer wraps the pixel values in a vector. - // Could use this with multiple bands - val assembler = new VectorAssembler(). - setInputCols(Array("tile")). - setOutputCol("features") - - // Or clustering algorithm - val kmeans = new KMeans().setK(k) - - // Construct the ML pipeline - val pipeline = new Pipeline().setStages(Array(exploder, assembler, kmeans)) - - // Compute the model - val model = pipeline.fit(forML) - - // Score the data - val clusteredCells = model.transform(forML) - - clusteredCells.show() - - clusteredCells.groupBy("prediction").count().show - - // Reassembling the clustering results takes a number of steps. - val tlm = rf.tileLayerMetadata.left.get - - // RasterFrames provides a special aggregation function for assembling tiles from cells with column/row indexes - val retiled = clusteredCells.groupBy(forML.spatialKeyColumn).agg( - assemble_tile($"column_index", $"row_index", $"prediction", tlm.tileCols, tlm.tileRows, ByteConstantNoDataCellType) - ) - - val clusteredRF = retiled.asRF($"spatial_key", tlm) - - val raster3 = clusteredRF.toRaster($"prediction", 774, 500) - - val clusterColors = IndexedColorMap.fromColorMap( - ColorRamps.Viridis.toColorMap((0 until k).toArray) - ) - - GeoTiff(raster3).copy(options = GeoTiffOptions(clusterColors)).write("clustered.tiff") - - spark.stop() -} diff --git a/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ExplodeSpec.scala similarity index 62% rename from core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/ExplodeSpec.scala index a06b6444b..da0af2397 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/ExplodeSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/ExplodeSpec.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes import geotrellis.raster._ import geotrellis.raster.resample.NearestNeighbor @@ -43,14 +45,14 @@ class ExplodeSpec extends TestEnvironment with TestData { write(query) assert(query.select("cell_0", "cell_1").as[(Double, Double)].collect().forall(_ == ((1.0, 2.0)))) val query2 = sql( - """|select rf_tile_dimensions(tiles) as dims, rf_explode_tiles(tiles) from ( + """|select rf_dimensions(tiles) as dims, rf_explode_tiles(tiles) from ( |select rf_make_constant_tile(1, 10, 10, 'int8raw') as tiles) |""".stripMargin) write(query2) assert(query2.columns.length === 4) val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2") - val exploded = df.select(explode_tiles($"tile1", $"tile2")) + val exploded = df.select(rf_explode_tiles($"tile1", $"tile2")) //exploded.printSchema() assert(exploded.columns.length === 4) assert(exploded.count() === 9) @@ -59,17 +61,17 @@ class ExplodeSpec extends TestEnvironment with TestData { it("should explode tiles with random sampling") { val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2") - val exploded = df.select(explode_tiles_sample(0.5, $"tile1", $"tile2")) + val exploded = df.select(rf_explode_tiles_sample(0.5, $"tile1", $"tile2")) assert(exploded.columns.length === 4) assert(exploded.count() < 9) } it("should handle null tiles") { val df = Seq[Tile](null, byteArrayTile, null, byteArrayTile, null).toDF("tile1") - val exploded = df.select(explode_tiles($"tile1")) + val exploded = df.select(rf_explode_tiles($"tile1")) assert(exploded.count === byteArrayTile.size * 2) val df2 = Seq[(Tile, Tile)]((byteArrayTile, null), (null, byteArrayTile), (byteArrayTile, byteArrayTile)).toDF("tile1", "tile2") - val exploded2 = df2.select(explode_tiles($"tile1", $"tile2")) + val exploded2 = df2.select(rf_explode_tiles($"tile1", $"tile2")) assert(exploded2.count === byteArrayTile.size * 3) } @@ -77,7 +79,7 @@ class ExplodeSpec extends TestEnvironment with TestData { // Create a tile with a single (wierd) no-data value val tile: Tile = UShortArrayTile(rangeArray(9, _.toShort), 3, 3, 5.toShort) val cells = Seq(tile).toDF("tile") - .select(explode_tiles($"tile")) + .select(rf_explode_tiles($"tile")) .select($"tile".as[Double]) .collect() @@ -87,7 +89,7 @@ class ExplodeSpec extends TestEnvironment with TestData { it("should handle user-defined NoData values in tile sampler") { val tiles = allTileTypes.filter(t ⇒ !t.isInstanceOf[BitArrayTile]).map(_.withNoData(Some(3))) val cells = tiles.toDF("tile") - .select(explode_tiles($"tile")) + .select(rf_explode_tiles($"tile")) .select($"tile".as[Double]) .collect() cells.count(_.isNaN) should be(tiles.size) @@ -103,53 +105,92 @@ class ExplodeSpec extends TestEnvironment with TestData { val tile = FloatConstantTile(1.1f, 10, 10, FloatCellType) val df = Seq[Tile](tile).toDF("tile") - val arrayDF = df.select(tile_to_array_double($"tile").as[Array[Double]]) + val arrayDF = df.select(rf_tile_to_array_double($"tile").as[Array[Double]]) arrayDF.first().sum should be (110.0 +- 0.0001) } it("should convert an array into a tile") { - val tile = FloatConstantTile(1.1f, 10, 10, FloatCellType) + val tile = TestData.randomTile(10, 10, FloatCellType) val df = Seq[Tile](tile, null).toDF("tile") - val arrayDF = df.withColumn("tileArray", tile_to_array_double($"tile")) + val arrayDF = df.withColumn("tileArray", rf_tile_to_array_double($"tile")) - val back = arrayDF.withColumn("backToTile", array_to_tile($"tileArray", 10, 10)) + val back = arrayDF.withColumn("backToTile", rf_array_to_tile($"tileArray", 10, 10)) val result = back.select($"backToTile".as[Tile]).first assert(result.toArrayDouble() === tile.toArrayDouble()) - val hasNoData = back.withColumn("with_no_data", with_no_data($"backToTile", 0)) + // Same round trip, but with SQL expression for rf_array_to_tile + val resultSql = arrayDF.selectExpr("rf_array_to_tile(tileArray, 10, 10) as backToTile").as[Tile].first + + assert(resultSql.toArrayDouble() === tile.toArrayDouble()) - val result2 = hasNoData.select($"with_no_data".as[Tile]).first + val hasNoData = back.withColumn("withNoData", rf_with_no_data($"backToTile", 0)) + + val result2 = hasNoData.select($"withNoData".as[Tile]).first assert(result2.cellType.asInstanceOf[UserDefinedNoData[_]].noDataValue === 0) } it("should reassemble single exploded tile") { - val df = Seq[Tile](byteArrayTile).toDF("tile") - .select(explode_tiles($"tile")) + val tile = TestData.randomTile(10, 10, FloatCellType) + val df = Seq[Tile](tile).toDF("tile") + .select(rf_explode_tiles($"tile")) - val assembled = df.agg(assemble_tile( + val assembled = df.agg( + rf_assemble_tile( COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, TILE_COLUMN, - 3, 3, byteArrayTile.cellType + 10, 10, tile.cellType )).as[Tile] val result = assembled.first() - assert(result === byteArrayTile) + assert(result === tile) + + val assembledSqlExpr = df.selectExpr("rf_assemble_tile(column_index, row_index, tile, 10, 10)") + + val resultSql = assembledSqlExpr.as[Tile].first() + assert(resultSql === tile) + + checkDocs("rf_assemble_tile") + } + + it("should reassemble single exploded tile with user-defined nodata") { + val ct = FloatUserDefinedNoDataCellType(-99) + val tile = TestData.injectND(3)(TestData.randomTile(5, 5, ct)) + val df = Seq[Tile](tile).toDF("tile") + .select(rf_explode_tiles($"tile")) + + val assembled = df.agg(rf_assemble_tile( + COLUMN_INDEX_COLUMN, + ROW_INDEX_COLUMN, + TILE_COLUMN, + 5, 5, ct + )).as[Tile] + + val result = assembled.first() + assert(result === tile) + + // and with SQL API + logger.info(df.schema.treeString) + + val assembledSqlExpr = df.selectExpr(s"rf_convert_cell_type(rf_assemble_tile(column_index, row_index, tile, 5, 5), '${ct.toString()}') as tile") + + val resultSql = assembledSqlExpr.as[Tile].first() + assert(resultSql === tile) + assert(resultSql.cellType === ct) } it("should reassemble multiple exploded tiles") { val image = sampleSmallGeoTiff - val tinyTiles = image.projectedRaster.toRF(10, 10) + val tinyTiles = image.projectedRaster.toLayer(10, 10) - val exploded = tinyTiles.select(tinyTiles.spatialKeyColumn, explode_tiles(tinyTiles.tileColumns.head)) - - //exploded.printSchema() + val exploded = tinyTiles.select(tinyTiles.spatialKeyColumn, rf_explode_tiles(tinyTiles.tileColumns.head)) val assembled = exploded.groupBy(tinyTiles.spatialKeyColumn) - .agg(assemble_tile( + .agg( + rf_assemble_tile( COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, TILE_COLUMN, @@ -158,7 +199,7 @@ class ExplodeSpec extends TestEnvironment with TestData { val tlm = tinyTiles.tileLayerMetadata.left.get - val rf = assembled.asRF(SPATIAL_KEY_COLUMN, tlm) + val rf = assembled.asLayer(SPATIAL_KEY_COLUMN, tlm) val (cols, rows) = image.tile.dimensions diff --git a/core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala similarity index 64% rename from core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala index 81c2d9202..eeea68544 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/ExtensionMethodSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/ExtensionMethodSpec.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,16 +15,18 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes -import astraea.spark.rasterframes.util.SubdivideSupport._ +package org.locationtech.rasterframes + import geotrellis.proj4.LatLng import geotrellis.raster.{ByteCellType, GridBounds, TileLayout} +import geotrellis.spark.tiling.{CRSWorldExtent, LayoutDefinition} import geotrellis.spark.{KeyBounds, SpatialKey, TileLayerMetadata} -import geotrellis.spark.tiling.LayoutDefinition -import geotrellis.spark.tiling.CRSWorldExtent +import org.apache.spark.sql.Encoders +import org.locationtech.rasterframes.util.SubdivideSupport /** * Tests miscellaneous extension methods. @@ -32,20 +34,20 @@ import geotrellis.spark.tiling.CRSWorldExtent * @since 3/20/18 */ //noinspection ScalaUnusedSymbol -class ExtensionMethodSpec extends TestEnvironment with TestData { - lazy val rf = sampleTileLayerRDD.toRF +class ExtensionMethodSpec extends TestEnvironment with TestData with SubdivideSupport { + lazy val rf = sampleTileLayerRDD.toLayer describe("DataFrame exention methods") { it("should maintain original type") { val df = rf.withPrefixedColumnNames("_foo_") - "val rf2: RasterFrame = df" should compile + "val rf2: RasterFrameLayer = df" should compile } it("should provide tagged column access") { val df = rf.drop("tile") "val Some(col) = df.spatialKeyColumn" should compile } } - describe("RasterFrame exention methods") { + describe("RasterFrameLayer exention methods") { it("should provide spatial key column") { noException should be thrownBy { rf.spatialKeyColumn @@ -54,14 +56,28 @@ class ExtensionMethodSpec extends TestEnvironment with TestData { } } describe("Miscellaneous extensions") { + import spark.implicits._ + + it("should find multiple extent columns") { + val df = Seq((extent, "fred", extent, 34.0)).toDF("e1", "s", "e2", "n") + df.extentColumns.size should be(2) + } + + it("should find multiple crs columns") { + // Not sure why implicit resolution isn't handling this properly. + implicit val enc = Encoders.tuple(crsEncoder, Encoders.STRING, crsEncoder, Encoders.scalaDouble) + val df = Seq((pe.crs, "fred", pe.crs, 34.0)).toDF("c1", "s", "c2", "n") + df.crsColumns.size should be(2) + } + it("should split TileLayout") { val tl1 = TileLayout(2, 3, 10, 10) assert(tl1.subdivide(0) === tl1) assert(tl1.subdivide(1) === tl1) assert(tl1.subdivide(2) === TileLayout(4, 6, 5, 5)) assertThrows[IllegalArgumentException](tl1.subdivide(-1)) - } + it("should split KeyBounds[SpatialKey]") { val grid = GridBounds(0, 0, 9, 9) val kb = KeyBounds(grid) @@ -76,10 +92,10 @@ class ExtensionMethodSpec extends TestEnvironment with TestData { it("should split key") { val s1 = SpatialKey(0, 0).subdivide(2) - assert(s1 === Seq(SpatialKey(0,0), SpatialKey(1,0), SpatialKey(0,1), SpatialKey(1,1))) + assert(s1 === Seq(SpatialKey(0, 0), SpatialKey(1, 0), SpatialKey(0, 1), SpatialKey(1, 1))) val s2 = SpatialKey(2, 3).subdivide(3) - assert(s2 === Seq(SpatialKey(6,9), SpatialKey(7,9), SpatialKey(8,9), SpatialKey(6,10), SpatialKey(7,10), SpatialKey(8,10), SpatialKey(6,11), SpatialKey(7,11), SpatialKey(8,11))) + assert(s2 === Seq(SpatialKey(6, 9), SpatialKey(7, 9), SpatialKey(8, 9), SpatialKey(6, 10), SpatialKey(7, 10), SpatialKey(8, 10), SpatialKey(6, 11), SpatialKey(7, 11), SpatialKey(8, 11))) } it("should split TileLayerMetadata[SpatialKey]") { @@ -91,7 +107,12 @@ class ExtensionMethodSpec extends TestEnvironment with TestData { val divided = tlm.subdivide(2) - assert(divided.tileLayout.tileDimensions === (tileSize/2, tileSize/2)) + assert(divided.tileLayout.tileDimensions === (tileSize / 2, tileSize / 2)) + } + + it("should render Markdown") { + import org.locationtech.rasterframes.util._ + rf.toMarkdown().count(_ == '|') shouldBe >=(3 * 5) } } } diff --git a/core/src/test/scala/astraea/spark/rasterframes/JTSSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/GeometryFunctionsSpec.scala similarity index 52% rename from core/src/test/scala/astraea/spark/rasterframes/JTSSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/GeometryFunctionsSpec.scala index 52def8620..54321d0dc 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/JTSSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/GeometryFunctionsSpec.scala @@ -15,22 +15,27 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes -import com.vividsolutions.jts.geom._ import geotrellis.proj4.{LatLng, Sinusoidal, WebMercator} -import geotrellis.vector.{Point ⇒ GTPoint} +import geotrellis.vector.{Extent, Point => GTPoint} +import org.locationtech.jts.geom._ +import spray.json.JsNumber /** * Test rig for operations providing interop with JTS types. * * @since 12/16/17 */ -class JTSSpec extends TestEnvironment with TestData with StandardColumns { - describe("JTS interop") { - val rf = l8Sample(1).projectedRaster.toRF(10, 10).withBounds() +class GeometryFunctionsSpec extends TestEnvironment with TestData with StandardColumns { + import spark.implicits._ + + describe("Vector geometry operations") { + val rf = l8Sample(1).projectedRaster.toLayer(10, 10).withGeometry() it("should allow joining and filtering of tiles based on points") { import spark.implicits._ @@ -43,32 +48,32 @@ class JTSSpec extends TestEnvironment with TestData with StandardColumns { val locs = coords.toDF("id", "point") withClue("join with point column") { - assert(rf.join(locs, st_contains(BOUNDS_COLUMN, $"point")).count === coords.length) - assert(rf.join(locs, st_intersects(BOUNDS_COLUMN, $"point")).count === coords.length) + assert(rf.join(locs, st_contains(GEOMETRY_COLUMN, $"point")).count === coords.length) + assert(rf.join(locs, st_intersects(GEOMETRY_COLUMN, $"point")).count === coords.length) } withClue("point literal") { val point = coords.head._2 - assert(rf.filter(st_contains(BOUNDS_COLUMN, geomLit(point))).count === 1) - assert(rf.filter(st_intersects(BOUNDS_COLUMN, geomLit(point))).count === 1) - assert(rf.filter(BOUNDS_COLUMN intersects point).count === 1) - assert(rf.filter(BOUNDS_COLUMN intersects GTPoint(point)).count === 1) - assert(rf.filter(BOUNDS_COLUMN containsGeom point).count === 1) + assert(rf.filter(st_contains(GEOMETRY_COLUMN, geomLit(point))).count === 1) + assert(rf.filter(st_intersects(GEOMETRY_COLUMN, geomLit(point))).count === 1) + assert(rf.filter(GEOMETRY_COLUMN intersects point).count === 1) + assert(rf.filter(GEOMETRY_COLUMN intersects GTPoint(point)).count === 1) + assert(rf.filter(GEOMETRY_COLUMN containsGeom point).count === 1) } withClue("exercise predicates") { val point = geomLit(coords.head._2) - assert(rf.filter(st_covers(BOUNDS_COLUMN, point)).count === 1) - assert(rf.filter(st_crosses(BOUNDS_COLUMN, point)).count === 0) - assert(rf.filter(st_disjoint(BOUNDS_COLUMN, point)).count === rf.count - 1) - assert(rf.filter(st_overlaps(BOUNDS_COLUMN, point)).count === 0) - assert(rf.filter(st_touches(BOUNDS_COLUMN, point)).count === 0) - assert(rf.filter(st_within(BOUNDS_COLUMN, point)).count === 0) + assert(rf.filter(st_covers(GEOMETRY_COLUMN, point)).count === 1) + assert(rf.filter(st_crosses(GEOMETRY_COLUMN, point)).count === 0) + assert(rf.filter(st_disjoint(GEOMETRY_COLUMN, point)).count === rf.count - 1) + assert(rf.filter(st_overlaps(GEOMETRY_COLUMN, point)).count === 0) + assert(rf.filter(st_touches(GEOMETRY_COLUMN, point)).count === 0) + assert(rf.filter(st_within(GEOMETRY_COLUMN, point)).count === 0) } } it("should allow construction of geometry literals") { - import JTS._ + import GeomData._ assert(dfBlank.select(geomLit(point)).first === point) assert(dfBlank.select(geomLit(line)).first === line) assert(dfBlank.select(geomLit(poly)).first === poly) @@ -80,8 +85,8 @@ class JTSSpec extends TestEnvironment with TestData with StandardColumns { it("should provide a means of getting a bounding box") { import spark.implicits._ - val boxed = rf.select(BOUNDS_COLUMN, envelope(BOUNDS_COLUMN) as "env") - assert(boxed.select($"env".as[Envelope]).first.getArea > 0) + val boxed = rf.select(GEOMETRY_COLUMN, st_extent(GEOMETRY_COLUMN) as "extent") + assert(boxed.select($"extent".as[Extent]).first.area > 0) assert(boxed.toDF("bounds", "bbox").select("bbox.*").schema.length === 4) } @@ -108,9 +113,9 @@ class JTSSpec extends TestEnvironment with TestData with StandardColumns { val df = Seq((latLng, webMercator)).toDF("ll", "wm") val rp = df.select( - reproject_geometry($"ll", LatLng, WebMercator) as "wm2", - reproject_geometry($"wm", WebMercator, LatLng) as "ll2", - reproject_geometry(reproject_geometry($"ll", LatLng, Sinusoidal), Sinusoidal, WebMercator) as "wm3" + st_reproject($"ll", LatLng, WebMercator) as "wm2", + st_reproject($"wm", WebMercator, LatLng) as "ll2", + st_reproject(st_reproject($"ll", LatLng, Sinusoidal), Sinusoidal, WebMercator) as "wm3" ).as[(Geometry, Geometry, Geometry)] @@ -123,9 +128,41 @@ class JTSSpec extends TestEnvironment with TestData with StandardColumns { df.createOrReplaceTempView("geom") - val wm4 = sql("SELECT rf_reproject_geometry(ll, '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs', 'EPSG:3857') AS wm4 from geom") + val wm4 = sql("SELECT st_reproject(ll, '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs', 'EPSG:3857') AS wm4 from geom") .as[Geometry].first() wm4 should matchGeom(webMercator, 0.00001) + + // TODO: See comment in `org.locationtech.rasterframes.expressions.register` for + // TODO: what needs to happen to support this. + //checkDocs("st_reproject") } } + + it("should rasterize geometry") { + val rf = l8Sample(1).projectedRaster.toLayer.withGeometry() + val df = GeomData.features.map(f ⇒ ( + f.geom.reproject(LatLng, rf.crs).jtsGeom, + f.data.fields("id").asInstanceOf[JsNumber].value.intValue() + )).toDF("geom", "__fid__") + + val toRasterize = rf.crossJoin(df) + + val tlm = rf.tileLayerMetadata.merge + + val (cols, rows) = tlm.layout.tileLayout.tileDimensions + + val rasterized = toRasterize.withColumn("rasterized", rf_rasterize($"geom", GEOMETRY_COLUMN, $"__fid__", cols, rows)) + + assert(rasterized.count() === df.count() * rf.count()) + assert(rasterized.select(rf_dimensions($"rasterized")).distinct().count() === 1) + val pixelCount = rasterized.select(rf_agg_data_cells($"rasterized")).first() + assert(pixelCount < cols * rows) + + + toRasterize.createOrReplaceTempView("stuff") + val viaSQL = sql(s"select rf_rasterize(geom, geometry, __fid__, $cols, $rows) as rasterized from stuff") + assert(viaSQL.select(rf_agg_data_cells($"rasterized")).first === pixelCount) + + //rasterized.select($"rasterized".as[Tile]).foreach(t ⇒ t.renderPng(ColorMaps.IGBP).write("target/" + t.hashCode() + ".png")) + } } diff --git a/core/src/test/scala/astraea/spark/rasterframes/MetadataSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/MetadataSpec.scala similarity index 60% rename from core/src/test/scala/astraea/spark/rasterframes/MetadataSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/MetadataSpec.scala index 4960f7e65..0f179937a 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/MetadataSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/MetadataSpec.scala @@ -1,4 +1,25 @@ -package astraea.spark.rasterframes +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2017 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes import org.apache.spark.sql.types.MetadataBuilder @@ -14,7 +35,7 @@ class MetadataSpec extends TestEnvironment with TestData { describe("Metadata storage") { it("should serialize and attach metadata") { - //val rf = sampleGeoTiff.projectedRaster.toRF(128, 128) + //val rf = sampleGeoTiff.projectedRaster.toLayer(128, 128) val df = spark.createDataset(Seq((1, "one"), (2, "two"), (3, "three"))).toDF("num", "str") val withmeta = df.mapColumnAttribute($"num", attr ⇒ { attr.withMetadata(sampleMetadata) diff --git a/core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/RasterFrameSpec.scala similarity index 79% rename from core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/RasterFrameSpec.scala index 984da98e7..e77a0fecc 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/RasterFrameSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/RasterFrameSpec.scala @@ -1,11 +1,32 @@ -package astraea.spark.rasterframes +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes import java.sql.Timestamp import java.time.ZonedDateTime -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes.util._ import geotrellis.proj4.LatLng import geotrellis.raster.render.{ColorMap, ColorRamp} import geotrellis.raster.{ProjectedRaster, Tile, TileFeature, TileLayout, UByteCellType} @@ -14,11 +35,12 @@ import geotrellis.spark.tiling._ import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.spark.sql.functions._ import org.apache.spark.sql.{SQLContext, SparkSession} +import org.locationtech.rasterframes.model.TileDimensions import scala.util.control.NonFatal /** - * RasterFrame test rig. + * RasterFrameLayer test rig. * * @since 7/10/17 */ @@ -52,12 +74,12 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } } - describe("RasterFrame") { + describe("RasterFrameLayer") { it("should implicitly convert from spatial layer type") { val tileLayerRDD = TestData.randomSpatialTileLayerRDD(20, 20, 2, 2) - val rf = tileLayerRDD.toRF + val rf = tileLayerRDD.toLayer assert(rf.tileColumns.nonEmpty) assert(rf.spatialKeyColumn.columnName == "spatial_key") @@ -66,16 +88,14 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys assert(rf.schema.head.metadata.json.contains("tileLayout")) assert( - rf.select(tile_dimensions($"tile")) - .as[Tuple1[(Int, Int)]] - .map(_._1) + rf.select(rf_dimensions($"tile")) .collect() - .forall(_ == (10, 10)) + .forall(_ == TileDimensions(10, 10)) ) assert(rf.count() === 4) - val cols = tileLayerRDD.toRF("foo").columns + val cols = tileLayerRDD.toLayer("foo").columns assert(!cols.contains("tile")) assert(cols.contains("foo")) } @@ -84,7 +104,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys val tileLayerRDD = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2) - val rf = tileLayerRDD.toRF + val rf = tileLayerRDD.toLayer try { assert(rf.tileColumns.nonEmpty) @@ -93,11 +113,10 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } catch { case NonFatal(ex) ⇒ - rf.printSchema() println(rf.schema.prettyJson) throw ex } - val cols = tileLayerRDD.toRF("foo").columns + val cols = tileLayerRDD.toLayer("foo").columns assert(!cols.contains("tile")) assert(cols.contains("foo")) } @@ -116,7 +135,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys val tileLayerRDD = TileFeatureLayerRDD(tileRDD, metadata) - val rf = tileLayerRDD.toRF + val rf = tileLayerRDD.toLayer assert(rf.columns.toSet === Set(SPATIAL_KEY_COLUMN, TILE_COLUMN, TILE_FEATURE_DATA_COLUMN).map(_.columnName)) } @@ -135,14 +154,14 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys val tileLayerRDD = TileFeatureLayerRDD(tileRDD, metadata) - val rf = tileLayerRDD.toRF + val rf = tileLayerRDD.toLayer assert(rf.columns.toSet === Set(SPATIAL_KEY_COLUMN, TEMPORAL_KEY_COLUMN, TILE_COLUMN, TILE_FEATURE_DATA_COLUMN).map(_.columnName)) } it("should support adding a timestamp column") { val now = ZonedDateTime.now() - val rf = sampleGeoTiff.projectedRaster.toRF(256, 256) + val rf = sampleGeoTiff.projectedRaster.toLayer(256, 256) val wt = rf.addTemporalComponent(now) val goodie = wt.withTimestamp() assert(goodie.columns.contains("timestamp")) @@ -153,7 +172,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } it("should support spatial joins") { - val rf = sampleGeoTiff.projectedRaster.toRF(256, 256) + val rf = sampleGeoTiff.projectedRaster.toLayer(256, 256) val wt = rf.addTemporalComponent(TemporalKey(34)) @@ -167,11 +186,11 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } it("should have correct schema on inner spatial joins") { - val left = sampleGeoTiff.projectedRaster.toRF(256, 256) + val left = sampleGeoTiff.projectedRaster.toLayer(256, 256) .addTemporalComponent(TemporalKey(34)) val right = left.withColumnRenamed(left.tileColumns.head.columnName, "rightTile") - .asRF + .asLayer val joined = left.spatialJoin(right) // since right is a copy of left, should not drop any rows with inner join @@ -180,22 +199,25 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys // Should use left's key column names assert(joined.spatialKeyColumn.columnName === left.spatialKeyColumn.columnName) assert(joined.temporalKeyColumn.map(_.columnName) === left.temporalKeyColumn.map(_.columnName)) - + assert(joined.tileColumns.size === 2) + assert(joined.notTileColumns.size === 2) + assert(joined.tileColumns.toSet === joined.tileColumns.toSet) + assert(joined.tileColumns.toSet !== joined.notTileColumns.toSet) } - it("should convert a GeoTiff to RasterFrame") { + it("should convert a GeoTiff to RasterFrameLayer") { val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster val (cols, rows) = praster.raster.dimensions val layoutCols = math.ceil(cols / 128.0).toInt val layoutRows = math.ceil(rows / 128.0).toInt - assert(praster.toRF.count() === 1) - assert(praster.toRF(128, 128).count() === (layoutCols * layoutRows)) + assert(praster.toLayer.count() === 1) + assert(praster.toLayer(128, 128).count() === (layoutCols * layoutRows)) } it("should provide TileLayerMetadata[SpatialKey]") { - val rf = sampleGeoTiff.projectedRaster.toRF(256, 256) + val rf = sampleGeoTiff.projectedRaster.toLayer(256, 256) val tlm = rf.tileLayerMetadata.merge val bounds = tlm.bounds.get assert(bounds === KeyBounds(SpatialKey(0, 0), SpatialKey(3, 1))) @@ -203,7 +225,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys it("should provide TileLayerMetadata[SpaceTimeKey]") { val now = ZonedDateTime.now() - val rf = sampleGeoTiff.projectedRaster.toRF(256, 256, now) + val rf = sampleGeoTiff.projectedRaster.toLayer(256, 256, now) val tlm = rf.tileLayerMetadata.merge val bounds = tlm.bounds.get assert(bounds._1 === SpaceTimeKey(0, 0, now)) @@ -213,7 +235,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys // it("should clip TileLayerMetadata extent") { // val tiled = sampleTileLayerRDD // -// val rf = tiled.reproject(LatLng, tiled.metadata.layout)._2.toRF +// val rf = tiled.reproject(LatLng, tiled.metadata.layout)._2.toLayer // // val worldish = Extent(-179, -89, 179, 89) // val areaish = Extent(-90, 30, -81, 40) @@ -240,14 +262,14 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } it("shouldn't clip already clipped extents") { - val rf = TestData.randomSpatialTileLayerRDD(1024, 1024, 8, 8).toRF + val rf = TestData.randomSpatialTileLayerRDD(1024, 1024, 8, 8).toLayer val expected = rf.tileLayerMetadata.merge.extent val computed = rf.clipLayerExtent.tileLayerMetadata.merge.extent basicallySame(expected, computed) val pr = sampleGeoTiff.projectedRaster - val rf2 = pr.toRF(256, 256) + val rf2 = pr.toLayer(256, 256) val expected2 = rf2.tileLayerMetadata.merge.extent val computed2 = rf2.clipLayerExtent.tileLayerMetadata.merge.extent basicallySame(expected2, computed2) @@ -272,7 +294,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } it("should rasterize with a spatiotemporal key") { - val rf = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toRF + val rf = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toLayer noException shouldBe thrownBy { rf.toRaster($"tile", 128, 128) @@ -280,8 +302,8 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } it("should maintain metadata after all spatial join operations") { - val rf1 = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toRF - val rf2 = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toRF + val rf1 = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toLayer + val rf2 = TestData.randomSpatioTemporalTileLayerRDD(20, 20, 2, 2).toLayer val joinTypes = Seq("inner", "outer", "fullouter", "left_outer", "right_outer", "leftsemi") forEvery(joinTypes) { jt ⇒ @@ -293,9 +315,9 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys it("should rasterize multiband") { withClue("Landsat") { - val blue = TestData.l8Sample(1).projectedRaster.toRF.withRFColumnRenamed("tile", "blue") - val green = TestData.l8Sample(2).projectedRaster.toRF.withRFColumnRenamed("tile", "green") - val red = TestData.l8Sample(3).projectedRaster.toRF.withRFColumnRenamed("tile", "red") + val blue = TestData.l8Sample(1).projectedRaster.toLayer.withRFColumnRenamed("tile", "blue") + val green = TestData.l8Sample(2).projectedRaster.toLayer.withRFColumnRenamed("tile", "green") + val red = TestData.l8Sample(3).projectedRaster.toLayer.withRFColumnRenamed("tile", "red") val joined = blue.spatialJoin(green).spatialJoin(red) @@ -306,9 +328,9 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys } } withClue("NAIP") { - val red = TestData.naipSample(1).projectedRaster.toRF.withRFColumnRenamed("tile", "red") - val green = TestData.naipSample(2).projectedRaster.toRF.withRFColumnRenamed("tile", "green") - val blue = TestData.naipSample(3).projectedRaster.toRF.withRFColumnRenamed("tile", "blue") + val red = TestData.naipSample(1).projectedRaster.toLayer.withRFColumnRenamed("tile", "red") + val green = TestData.naipSample(2).projectedRaster.toLayer.withRFColumnRenamed("tile", "green") + val blue = TestData.naipSample(3).projectedRaster.toLayer.withRFColumnRenamed("tile", "blue") val joined = blue.spatialJoin(green).spatialJoin(red) noException shouldBe thrownBy { @@ -323,7 +345,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys // 774 × 500 val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster val (cols, rows) = praster.raster.dimensions - val rf = praster.toRF(64, 64) + val rf = praster.toLayer(64, 64) val raster = rf.toRaster($"tile", cols, rows) render(raster.tile, "normal") @@ -344,7 +366,7 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys it("shouldn't restitch raster that's has derived tiles") { val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster - val rf = praster.toRF(64, 64) + val rf = praster.toLayer(64, 64) val equalizer = udf((t: Tile) => t.equalize()) @@ -352,13 +374,13 @@ class RasterFrameSpec extends TestEnvironment with MetadataKeys intercept[IllegalArgumentException] { // spatial_key is lost - equalized.asRF.toRaster($"equalized", 128, 128) + equalized.asLayer.toRaster($"equalized", 128, 128) } } it("should fetch CRS") { val praster: ProjectedRaster[Tile] = sampleGeoTiff.projectedRaster - val rf = praster.toRF + val rf = praster.toLayer assert(rf.crs === praster.crs) } diff --git a/core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/RasterFunctionsSpec.scala similarity index 55% rename from core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/RasterFunctionsSpec.scala index da2ab9c56..caccd74ca 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/RasterFunctionsSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/RasterFunctionsSpec.scala @@ -19,22 +19,22 @@ * */ -package astraea.spark.rasterframes -import astraea.spark.rasterframes.TestData.injectND -import astraea.spark.rasterframes.expressions.accessors.ExtractTile -import astraea.spark.rasterframes.stats.{CellHistogram, CellStatistics, LocalCellStatistics} -import astraea.spark.rasterframes.tiles.ProjectedRasterTile +package org.locationtech.rasterframes + import geotrellis.proj4.LatLng import geotrellis.raster import geotrellis.raster.testkit.RasterMatchers -import geotrellis.raster.{ArrayTile, BitCellType, ByteUserDefinedNoDataCellType, DoubleConstantNoDataCellType, ShortConstantNoDataCellType, Tile, UByteConstantNoDataCellType} +import geotrellis.raster._ import geotrellis.vector.Extent -import org.apache.spark.sql.{AnalysisException, Encoders} +import org.apache.spark.sql.Encoders import org.apache.spark.sql.functions._ -import org.scalatest.{FunSpec, Matchers} +import org.locationtech.rasterframes.expressions.accessors.ExtractTile +import org.locationtech.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.ref.{RasterRef, RasterSource} +import org.locationtech.rasterframes.stats._ +import org.locationtech.rasterframes.tiles.ProjectedRasterTile -class RasterFunctionsSpec extends FunSpec - with TestEnvironment with Matchers with RasterMatchers { +class RasterFunctionsSpec extends TestEnvironment with RasterMatchers { import spark.implicits._ val extent = Extent(10, 20, 30, 40) @@ -51,66 +51,95 @@ class RasterFunctionsSpec extends FunSpec lazy val three = TestData.projectedRasterTile(cols, rows, 3, extent, crs, ct) lazy val six = ProjectedRasterTile(three * two, three.extent, three.crs) lazy val nd = TestData.projectedRasterTile(cols, rows, -2, extent, crs, ct) - lazy val randTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextInt(), extent, crs, ct) - lazy val randNDTile = TestData.injectND(numND)(randTile) + lazy val randPRT = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextInt(), extent, crs, ct) + lazy val randNDPRT = TestData.injectND(numND)(randPRT) lazy val randDoubleTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextGaussian(), extent, crs, DoubleConstantNoDataCellType) lazy val randDoubleNDTile = TestData.injectND(numND)(randDoubleTile) lazy val randPositiveDoubleTile = TestData.projectedRasterTile(cols, rows, scala.util.Random.nextDouble() + 1e-6, extent, crs, DoubleConstantNoDataCellType) - val expectedRandNoData: Long = numND * tileCount + val expectedRandNoData: Long = numND * tileCount.toLong val expectedRandData: Long = cols * rows * tileCount - expectedRandNoData - lazy val randNDTilesWithNull = Seq.fill[Tile](tileCount)(injectND(numND)( + lazy val randNDTilesWithNull = Seq.fill[Tile](tileCount)(TestData.injectND(numND)( TestData.randomTile(cols, rows, UByteConstantNoDataCellType) )).map(ProjectedRasterTile(_, extent, crs)) :+ null + def lazyPRT = RasterRef(RasterSource(TestData.l8samplePath), 0, None).tile + implicit val pairEnc = Encoders.tuple(ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder) implicit val tripEnc = Encoders.tuple(ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder, ProjectedRasterTile.prtEncoder) + describe("constant tile generation operations") { + val dim = 2 + val rows = 2 + + it("should create a ones tile") { + val df = (0 until rows).toDF("id") + .withColumn("const", rf_make_ones_tile(dim, dim, IntConstantNoDataCellType)) + val result = df.select(rf_tile_sum($"const") as "ts").agg(sum("ts")).as[Double].first() + result should be (dim * dim * rows) + } + + it("should create a zeros tile") { + val df = (0 until rows).toDF("id") + .withColumn("const", rf_make_zeros_tile(dim, dim, FloatConstantNoDataCellType)) + val result = df.select(rf_tile_sum($"const") as "ts").agg(sum("ts")).as[Double].first() + result should be (0) + } + + it("should create an arbitrary constant tile") { + val value = 4 + val df = (0 until rows).toDF("id") + .withColumn("const", rf_make_constant_tile(value, dim, dim, ByteConstantNoDataCellType)) + val result = df.select(rf_tile_sum($"const") as "ts").agg(sum("ts")).as[Double].first() + result should be (dim * dim * rows * value) + } + } + describe("arithmetic tile operations") { it("should local_add") { val df = Seq((one, two)).toDF("one", "two") - val maybeThree = df.select(local_add($"one", $"two")).as[ProjectedRasterTile] + val maybeThree = df.select(rf_local_add($"one", $"two")).as[ProjectedRasterTile] assertEqual(maybeThree.first(), three) assertEqual(df.selectExpr("rf_local_add(one, two)").as[ProjectedRasterTile].first(), three) - val maybeThreeTile = df.select(local_add(ExtractTile($"one"), ExtractTile($"two"))).as[Tile] + val maybeThreeTile = df.select(rf_local_add(ExtractTile($"one"), ExtractTile($"two"))).as[Tile] assertEqual(maybeThreeTile.first(), three.toArrayTile()) checkDocs("rf_local_add") } - it("should local_subtract") { + it("should rf_local_subtract") { val df = Seq((three, two)).toDF("three", "two") - val maybeOne = df.select(local_subtract($"three", $"two")).as[ProjectedRasterTile] + val maybeOne = df.select(rf_local_subtract($"three", $"two")).as[ProjectedRasterTile] assertEqual(maybeOne.first(), one) assertEqual(df.selectExpr("rf_local_subtract(three, two)").as[ProjectedRasterTile].first(), one) val maybeOneTile = - df.select(local_subtract(ExtractTile($"three"), ExtractTile($"two"))).as[Tile] + df.select(rf_local_subtract(ExtractTile($"three"), ExtractTile($"two"))).as[Tile] assertEqual(maybeOneTile.first(), one.toArrayTile()) checkDocs("rf_local_subtract") } - it("should local_multiply") { + it("should rf_local_multiply") { val df = Seq((three, two)).toDF("three", "two") - val maybeSix = df.select(local_multiply($"three", $"two")).as[ProjectedRasterTile] + val maybeSix = df.select(rf_local_multiply($"three", $"two")).as[ProjectedRasterTile] assertEqual(maybeSix.first(), six) assertEqual(df.selectExpr("rf_local_multiply(three, two)").as[ProjectedRasterTile].first(), six) val maybeSixTile = - df.select(local_multiply(ExtractTile($"three"), ExtractTile($"two"))).as[Tile] + df.select(rf_local_multiply(ExtractTile($"three"), ExtractTile($"two"))).as[Tile] assertEqual(maybeSixTile.first(), six.toArrayTile()) checkDocs("rf_local_multiply") } - it("should local_divide") { + it("should rf_local_divide") { val df = Seq((six, two)).toDF("six", "two") - val maybeThree = df.select(local_divide($"six", $"two")).as[ProjectedRasterTile] + val maybeThree = df.select(rf_local_divide($"six", $"two")).as[ProjectedRasterTile] assertEqual(maybeThree.first(), three) assertEqual(df.selectExpr("rf_local_divide(six, two)").as[ProjectedRasterTile].first(), three) @@ -119,151 +148,176 @@ class RasterFunctionsSpec extends FunSpec .as[ProjectedRasterTile].first(), six) val maybeThreeTile = - df.select(local_divide(ExtractTile($"six"), ExtractTile($"two"))).as[Tile] + df.select(rf_local_divide(ExtractTile($"six"), ExtractTile($"two"))).as[Tile] assertEqual(maybeThreeTile.first(), three.toArrayTile()) checkDocs("rf_local_divide") } } describe("scalar tile operations") { - it("should local_add") { + it("should rf_local_add") { val df = Seq(one).toDF("one") - val maybeThree = df.select(local_add($"one", 2)).as[ProjectedRasterTile] + val maybeThree = df.select(rf_local_add($"one", 2)).as[ProjectedRasterTile] assertEqual(maybeThree.first(), three) - val maybeThreeD = df.select(local_add($"one", 2.1)).as[ProjectedRasterTile] + val maybeThreeD = df.select(rf_local_add($"one", 2.1)).as[ProjectedRasterTile] assertEqual(maybeThreeD.first(), three.convert(DoubleConstantNoDataCellType).localAdd(0.1)) - val maybeThreeTile = df.select(local_add(ExtractTile($"one"), 2)).as[Tile] + val maybeThreeTile = df.select(rf_local_add(ExtractTile($"one"), 2)).as[Tile] assertEqual(maybeThreeTile.first(), three.toArrayTile()) } - it("should local_subtract") { + it("should rf_local_subtract") { val df = Seq(three).toDF("three") - val maybeOne = df.select(local_subtract($"three", 2)).as[ProjectedRasterTile] + val maybeOne = df.select(rf_local_subtract($"three", 2)).as[ProjectedRasterTile] assertEqual(maybeOne.first(), one) - val maybeOneD = df.select(local_subtract($"three", 2.0)).as[ProjectedRasterTile] + val maybeOneD = df.select(rf_local_subtract($"three", 2.0)).as[ProjectedRasterTile] assertEqual(maybeOneD.first(), one) - val maybeOneTile = df.select(local_subtract(ExtractTile($"three"), 2)).as[Tile] + val maybeOneTile = df.select(rf_local_subtract(ExtractTile($"three"), 2)).as[Tile] assertEqual(maybeOneTile.first(), one.toArrayTile()) } - it("should local_multiply") { + it("should rf_local_multiply") { val df = Seq(three).toDF("three") - val maybeSix = df.select(local_multiply($"three", 2)).as[ProjectedRasterTile] + val maybeSix = df.select(rf_local_multiply($"three", 2)).as[ProjectedRasterTile] assertEqual(maybeSix.first(), six) - val maybeSixD = df.select(local_multiply($"three", 2.0)).as[ProjectedRasterTile] + val maybeSixD = df.select(rf_local_multiply($"three", 2.0)).as[ProjectedRasterTile] assertEqual(maybeSixD.first(), six) - val maybeSixTile = df.select(local_multiply(ExtractTile($"three"), 2)).as[Tile] + val maybeSixTile = df.select(rf_local_multiply(ExtractTile($"three"), 2)).as[Tile] assertEqual(maybeSixTile.first(), six.toArrayTile()) } - it("should local_divide") { + it("should rf_local_divide") { val df = Seq(six).toDF("six") - val maybeThree = df.select(local_divide($"six", 2)).as[ProjectedRasterTile] + val maybeThree = df.select(rf_local_divide($"six", 2)).as[ProjectedRasterTile] assertEqual(maybeThree.first(), three) - val maybeThreeD = df.select(local_divide($"six", 2.0)).as[ProjectedRasterTile] + val maybeThreeD = df.select(rf_local_divide($"six", 2.0)).as[ProjectedRasterTile] assertEqual(maybeThreeD.first(), three) - val maybeThreeTile = df.select(local_divide(ExtractTile($"six"), 2)).as[Tile] + val maybeThreeTile = df.select(rf_local_divide(ExtractTile($"six"), 2)).as[Tile] assertEqual(maybeThreeTile.first(), three.toArrayTile()) } } describe("tile comparison relations") { - it("should evaluate local_less") { + it("should evaluate rf_local_less") { val df = Seq((two, three, six)).toDF("two", "three", "six") - df.select(tile_sum(local_less($"two", 6))).first() should be(100.0) - df.select(tile_sum(local_less($"two", 1.9))).first() should be(0.0) - df.select(tile_sum(local_less($"two", 2))).first() should be(0.0) - df.select(tile_sum(local_less($"three", $"two"))).first() should be(0.0) - df.select(tile_sum(local_less($"three", $"three"))).first() should be(0.0) - df.select(tile_sum(local_less($"three", $"six"))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_less($"two", 6))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_less($"two", 1.9))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_less($"two", 2))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_less($"three", $"two"))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_less($"three", $"three"))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_less($"three", $"six"))).first() should be(100.0) df.selectExpr("rf_tile_sum(rf_local_less(two, 6))").as[Double].first() should be(100.0) df.selectExpr("rf_tile_sum(rf_local_less(three, three))").as[Double].first() should be(0.0) checkDocs("rf_local_less") } - it("should evaluate local_less_equal") { + it("should evaluate rf_local_less_equal") { val df = Seq((two, three, six)).toDF("two", "three", "six") - df.select(tile_sum(local_less_equal($"two", 6))).first() should be(100.0) - df.select(tile_sum(local_less_equal($"two", 1.9))).first() should be(0.0) - df.select(tile_sum(local_less_equal($"two", 2))).first() should be(100.0) - df.select(tile_sum(local_less_equal($"three", $"two"))).first() should be(0.0) - df.select(tile_sum(local_less_equal($"three", $"three"))).first() should be(100.0) - df.select(tile_sum(local_less_equal($"three", $"six"))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_less_equal($"two", 6))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_less_equal($"two", 1.9))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_less_equal($"two", 2))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_less_equal($"three", $"two"))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_less_equal($"three", $"three"))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_less_equal($"three", $"six"))).first() should be(100.0) df.selectExpr("rf_tile_sum(rf_local_less_equal(two, 6))").as[Double].first() should be(100.0) df.selectExpr("rf_tile_sum(rf_local_less_equal(three, three))").as[Double].first() should be(100.0) checkDocs("rf_local_less_equal") } - it("should evaluate local_greater") { + it("should evaluate rf_local_greater") { val df = Seq((two, three, six)).toDF("two", "three", "six") - df.select(tile_sum(local_greater($"two", 6))).first() should be(0.0) - df.select(tile_sum(local_greater($"two", 1.9))).first() should be(100.0) - df.select(tile_sum(local_greater($"two", 2))).first() should be(0.0) - df.select(tile_sum(local_greater($"three", $"two"))).first() should be(100.0) - df.select(tile_sum(local_greater($"three", $"three"))).first() should be(0.0) - df.select(tile_sum(local_greater($"three", $"six"))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_greater($"two", 6))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_greater($"two", 1.9))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_greater($"two", 2))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_greater($"three", $"two"))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_greater($"three", $"three"))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_greater($"three", $"six"))).first() should be(0.0) df.selectExpr("rf_tile_sum(rf_local_greater(two, 1.9))").as[Double].first() should be(100.0) df.selectExpr("rf_tile_sum(rf_local_greater(three, three))").as[Double].first() should be(0.0) checkDocs("rf_local_greater") } - it("should evaluate local_greater_equal") { + it("should evaluate rf_local_greater_equal") { val df = Seq((two, three, six)).toDF("two", "three", "six") - df.select(tile_sum(local_greater_equal($"two", 6))).first() should be(0.0) - df.select(tile_sum(local_greater_equal($"two", 1.9))).first() should be(100.0) - df.select(tile_sum(local_greater_equal($"two", 2))).first() should be(100.0) - df.select(tile_sum(local_greater_equal($"three", $"two"))).first() should be(100.0) - df.select(tile_sum(local_greater_equal($"three", $"three"))).first() should be(100.0) - df.select(tile_sum(local_greater_equal($"three", $"six"))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_greater_equal($"two", 6))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_greater_equal($"two", 1.9))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_greater_equal($"two", 2))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_greater_equal($"three", $"two"))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_greater_equal($"three", $"three"))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_greater_equal($"three", $"six"))).first() should be(0.0) df.selectExpr("rf_tile_sum(rf_local_greater_equal(two, 1.9))").as[Double].first() should be(100.0) df.selectExpr("rf_tile_sum(rf_local_greater_equal(three, three))").as[Double].first() should be(100.0) checkDocs("rf_local_greater_equal") } - it("should evaluate local_equal") { + it("should evaluate rf_local_equal") { val df = Seq((two, three, three)).toDF("two", "threeA", "threeB") - df.select(tile_sum(local_equal($"two", 2))).first() should be(100.0) - df.select(tile_sum(local_equal($"two", 2.1))).first() should be(0.0) - df.select(tile_sum(local_equal($"two", $"threeA"))).first() should be(0.0) - df.select(tile_sum(local_equal($"threeA", $"threeB"))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_equal($"two", 2))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_equal($"two", 2.1))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_equal($"two", $"threeA"))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_equal($"threeA", $"threeB"))).first() should be(100.0) df.selectExpr("rf_tile_sum(rf_local_equal(two, 1.9))").as[Double].first() should be(0.0) df.selectExpr("rf_tile_sum(rf_local_equal(threeA, threeB))").as[Double].first() should be(100.0) checkDocs("rf_local_equal") } - it("should evaluate local_unequal") { + it("should evaluate rf_local_unequal") { val df = Seq((two, three, three)).toDF("two", "threeA", "threeB") - df.select(tile_sum(local_unequal($"two", 2))).first() should be(0.0) - df.select(tile_sum(local_unequal($"two", 2.1))).first() should be(100.0) - df.select(tile_sum(local_unequal($"two", $"threeA"))).first() should be(100.0) - df.select(tile_sum(local_unequal($"threeA", $"threeB"))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_unequal($"two", 2))).first() should be(0.0) + df.select(rf_tile_sum(rf_local_unequal($"two", 2.1))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_unequal($"two", $"threeA"))).first() should be(100.0) + df.select(rf_tile_sum(rf_local_unequal($"threeA", $"threeB"))).first() should be(0.0) df.selectExpr("rf_tile_sum(rf_local_unequal(two, 1.9))").as[Double].first() should be(100.0) df.selectExpr("rf_tile_sum(rf_local_unequal(threeA, threeB))").as[Double].first() should be(0.0) checkDocs("rf_local_unequal") } } + describe("raster metadata") { + it("should get the TileDimensions of a Tile") { + val t = Seq(randPRT).toDF("tile").select(rf_dimensions($"tile")).first() + t should be (TileDimensions(randPRT.dimensions)) + checkDocs("rf_dimensions") + } + it("should get the Extent of a ProjectedRasterTile") { + val e = Seq(randPRT).toDF("tile").select(rf_extent($"tile")).first() + e should be (extent) + checkDocs("rf_extent") + } + + it("should get the CRS of a ProjectedRasterTile") { + val e = Seq(randPRT).toDF("tile").select(rf_crs($"tile")).first() + e should be (crs) + checkDocs("rf_crs") + } + + it("should get the Geometry of a ProjectedRasterTile") { + val g = Seq(randPRT).toDF("tile").select(rf_geometry($"tile")).first() + g should be (extent.jtsGeom) + checkDocs("rf_geometry") + } + } + describe("per-tile stats") { it("should compute data cell counts") { val df = Seq(TestData.injectND(numND)(two)).toDF("two") - df.select(data_cells($"two")).first() shouldBe (cols * rows - numND).toLong + df.select(rf_data_cells($"two")).first() shouldBe (cols * rows - numND).toLong val df2 = randNDTilesWithNull.toDF("tile") - df2.select(data_cells($"tile") as "cells") + df2.select(rf_data_cells($"tile") as "cells") .agg(sum("cells")) .as[Long] .first() should be (expectedRandData) @@ -272,10 +326,10 @@ class RasterFunctionsSpec extends FunSpec } it("should compute no-data cell counts") { val df = Seq(TestData.injectND(numND)(two)).toDF("two") - df.select(no_data_cells($"two")).first() should be(numND) + df.select(rf_no_data_cells($"two")).first() should be(numND) val df2 = randNDTilesWithNull.toDF("tile") - df2.select(no_data_cells($"tile") as "cells") + df2.select(rf_no_data_cells($"tile") as "cells") .agg(sum("cells")) .as[Long] .first() should be (expectedRandNoData) @@ -284,40 +338,56 @@ class RasterFunctionsSpec extends FunSpec } it("should detect no-data tiles") { val df = Seq(nd).toDF("nd") - df.select(is_no_data_tile($"nd")).first() should be(true) + df.select(rf_is_no_data_tile($"nd")).first() should be(true) val df2 = Seq(two).toDF("not_nd") - df2.select(is_no_data_tile($"not_nd")).first() should be(false) + df2.select(rf_is_no_data_tile($"not_nd")).first() should be(false) checkDocs("rf_is_no_data_tile") } + + it("should evaluate exists and for_all") { + val df0 = Seq(zero).toDF("tile") + df0.select(rf_exists($"tile")).first() should be(false) + df0.select(rf_for_all($"tile")).first() should be(false) + + Seq(one).toDF("tile").select(rf_exists($"tile")).first() should be(true) + Seq(one).toDF("tile").select(rf_for_all($"tile")).first() should be(true) + + val dfNd = Seq(TestData.injectND(1)(one)).toDF("tile") + dfNd.select(rf_exists($"tile")).first() should be(true) + dfNd.select(rf_for_all($"tile")).first() should be(false) + + checkDocs("rf_exists") + checkDocs("rf_for_all") + } it("should find the minimum cell value") { - val min = randNDTile.toArray().filter(c => raster.isData(c)).min.toDouble - val df = Seq(randNDTile).toDF("rand") - df.select(tile_min($"rand")).first() should be(min) + val min = randNDPRT.toArray().filter(c => raster.isData(c)).min.toDouble + val df = Seq(randNDPRT).toDF("rand") + df.select(rf_tile_min($"rand")).first() should be(min) df.selectExpr("rf_tile_min(rand)").as[Double].first() should be(min) checkDocs("rf_tile_min") } it("should find the maximum cell value") { - val max = randNDTile.toArray().filter(c => raster.isData(c)).max.toDouble - val df = Seq(randNDTile).toDF("rand") - df.select(tile_max($"rand")).first() should be(max) + val max = randNDPRT.toArray().filter(c => raster.isData(c)).max.toDouble + val df = Seq(randNDPRT).toDF("rand") + df.select(rf_tile_max($"rand")).first() should be(max) df.selectExpr("rf_tile_max(rand)").as[Double].first() should be(max) checkDocs("rf_tile_max") } it("should compute the tile mean cell value") { - val values = randNDTile.toArray().filter(c => raster.isData(c)) + val values = randNDPRT.toArray().filter(c => raster.isData(c)) val mean = values.sum.toDouble / values.length - val df = Seq(randNDTile).toDF("rand") - df.select(tile_mean($"rand")).first() should be(mean) + val df = Seq(randNDPRT).toDF("rand") + df.select(rf_tile_mean($"rand")).first() should be(mean) df.selectExpr("rf_tile_mean(rand)").as[Double].first() should be(mean) checkDocs("rf_tile_mean") } it("should compute the tile summary statistics") { - val values = randNDTile.toArray().filter(c => raster.isData(c)) + val values = randNDPRT.toArray().filter(c => raster.isData(c)) val mean = values.sum.toDouble / values.length - val df = Seq(randNDTile).toDF("rand") - val stats = df.select(tile_stats($"rand")).first() + val df = Seq(randNDPRT).toDF("rand") + val stats = df.select(rf_tile_stats($"rand")).first() stats.mean should be (mean +- 0.00001) val stats2 = df.selectExpr("rf_tile_stats(rand) as stats") @@ -325,7 +395,7 @@ class RasterFunctionsSpec extends FunSpec .first() stats2 should be (stats) - df.select(tile_stats($"rand") as "stats") + df.select(rf_tile_stats($"rand") as "stats") .select($"stats.mean").as[Double] .first() should be(mean +- 0.00001) df.selectExpr("rf_tile_stats(rand) as stats") @@ -334,7 +404,7 @@ class RasterFunctionsSpec extends FunSpec val df2 = randNDTilesWithNull.toDF("tile") df2 - .select(tile_stats($"tile")("data_cells") as "cells") + .select(rf_tile_stats($"tile")("data_cells") as "cells") .agg(sum("cells")) .as[Long] .first() should be (expectedRandData) @@ -343,8 +413,8 @@ class RasterFunctionsSpec extends FunSpec } it("should compute the tile histogram") { - val df = Seq(randNDTile).toDF("rand") - val h1 = df.select(tile_histogram($"rand")).first() + val df = Seq(randNDPRT).toDF("rand") + val h1 = df.select(rf_tile_histogram($"rand")).first() val h2 = df.selectExpr("rf_tile_histogram(rand) as hist") .select($"hist".as[CellHistogram]) @@ -359,14 +429,14 @@ class RasterFunctionsSpec extends FunSpec describe("aggregate statistics") { it("should count data cells") { val df = randNDTilesWithNull.filter(_ != null).toDF("tile") - df.select(agg_data_cells($"tile")).first() should be (expectedRandData) + df.select(rf_agg_data_cells($"tile")).first() should be (expectedRandData) df.selectExpr("rf_agg_data_cells(tile)").as[Long].first() should be (expectedRandData) checkDocs("rf_agg_data_cells") } it("should count no-data cells") { val df = randNDTilesWithNull.toDF("tile") - df.select(agg_no_data_cells($"tile")).first() should be (expectedRandNoData) + df.select(rf_agg_no_data_cells($"tile")).first() should be (expectedRandNoData) df.selectExpr("rf_agg_no_data_cells(tile)").as[Long].first() should be (expectedRandNoData) checkDocs("rf_agg_no_data_cells") } @@ -375,7 +445,7 @@ class RasterFunctionsSpec extends FunSpec val df = randNDTilesWithNull.toDF("tile") df - .select(agg_stats($"tile") as "stats") + .select(rf_agg_stats($"tile") as "stats") .select("stats.data_cells", "stats.no_data_cells") .as[(Long, Long)] .first() should be ((expectedRandData, expectedRandNoData)) @@ -389,7 +459,7 @@ class RasterFunctionsSpec extends FunSpec it("should compute a aggregate histogram") { val df = randNDTilesWithNull.toDF("tile") - val hist1 = df.select(agg_approx_histogram($"tile")).first() + val hist1 = df.select(rf_agg_approx_histogram($"tile")).first() val hist2 = df.selectExpr("rf_agg_approx_histogram(tile) as hist") .select($"hist".as[CellHistogram]) .first() @@ -399,7 +469,7 @@ class RasterFunctionsSpec extends FunSpec it("should compute local statistics") { val df = randNDTilesWithNull.toDF("tile") - val stats1 = df.select(agg_local_stats($"tile")) + val stats1 = df.select(rf_agg_local_stats($"tile")) .first() val stats2 = df.selectExpr("rf_agg_local_stats(tile) as stats") .select($"stats".as[LocalCellStatistics]) @@ -411,42 +481,42 @@ class RasterFunctionsSpec extends FunSpec it("should compute local min") { val df = Seq(two, three, one, six).toDF("tile") - df.select(agg_local_min($"tile")).first() should be(one.toArrayTile()) + df.select(rf_agg_local_min($"tile")).first() should be(one.toArrayTile()) df.selectExpr("rf_agg_local_min(tile)").as[Tile].first() should be(one.toArrayTile()) checkDocs("rf_agg_local_min") } it("should compute local max") { val df = Seq(two, three, one, six).toDF("tile") - df.select(agg_local_max($"tile")).first() should be(six.toArrayTile()) + df.select(rf_agg_local_max($"tile")).first() should be(six.toArrayTile()) df.selectExpr("rf_agg_local_max(tile)").as[Tile].first() should be(six.toArrayTile()) checkDocs("rf_agg_local_max") } it("should compute local data cell counts") { - val df = Seq(two, randNDTile, nd).toDF("tile") - val t1 = df.select(agg_local_data_cells($"tile")).first() + val df = Seq(two, randNDPRT, nd).toDF("tile") + val t1 = df.select(rf_agg_local_data_cells($"tile")).first() val t2 = df.selectExpr("rf_agg_local_data_cells(tile) as cnt").select($"cnt".as[Tile]).first() t1 should be (t2) checkDocs("rf_agg_local_data_cells") } it("should compute local no-data cell counts") { - val df = Seq(two, randNDTile, nd).toDF("tile") - val t1 = df.select(agg_local_no_data_cells($"tile")).first() + val df = Seq(two, randNDPRT, nd).toDF("tile") + val t1 = df.select(rf_agg_local_no_data_cells($"tile")).first() val t2 = df.selectExpr("rf_agg_local_no_data_cells(tile) as cnt").select($"cnt".as[Tile]).first() t1 should be (t2) - val t3 = df.select(local_add(agg_local_data_cells($"tile"), agg_local_no_data_cells($"tile"))).first() + val t3 = df.select(rf_local_add(rf_agg_local_data_cells($"tile"), rf_agg_local_no_data_cells($"tile"))).first() t3 should be(three.toArrayTile()) checkDocs("rf_agg_local_no_data_cells") } } describe("analytical transformations") { - it("should compute normalized_difference") { + it("should compute rf_normalized_difference") { val df = Seq((three, two)).toDF("three", "two") - df.select(tile_to_array_double(normalized_difference($"three", $"two"))) + df.select(rf_tile_to_array_double(rf_normalized_difference($"three", $"two"))) .first() .forall(_ == 0.2) shouldBe true @@ -459,18 +529,18 @@ class RasterFunctionsSpec extends FunSpec } it("should mask one tile against another") { - val df = Seq[Tile](randTile).toDF("tile") + val df = Seq[Tile](randPRT).toDF("tile") val withMask = df.withColumn("mask", - convert_cell_type( - local_greater($"tile", 50), + rf_convert_cell_type( + rf_local_greater($"tile", 50), "uint8") ) val withMasked = withMask.withColumn("masked", - mask($"tile", $"mask")) + rf_mask($"tile", $"mask")) - val result = withMasked.agg(agg_no_data_cells($"tile") < agg_no_data_cells($"masked")).as[Boolean] + val result = withMasked.agg(rf_agg_no_data_cells($"tile") < rf_agg_no_data_cells($"masked")).as[Boolean] result.first() should be(true) @@ -478,22 +548,22 @@ class RasterFunctionsSpec extends FunSpec } it("should inverse mask one tile against another") { - val df = Seq[Tile](randTile).toDF("tile") + val df = Seq[Tile](randPRT).toDF("tile") - val baseND = df.select(agg_no_data_cells($"tile")).first() + val baseND = df.select(rf_agg_no_data_cells($"tile")).first() val withMask = df.withColumn("mask", - convert_cell_type( - local_greater($"tile", 50), + rf_convert_cell_type( + rf_local_greater($"tile", 50), "uint8" ) ) val withMasked = withMask - .withColumn("masked", mask($"tile", $"mask")) - .withColumn("inv_masked", inverse_mask($"tile", $"mask")) + .withColumn("masked", rf_mask($"tile", $"mask")) + .withColumn("inv_masked", rf_inverse_mask($"tile", $"mask")) - val result = withMasked.agg(agg_no_data_cells($"masked") + agg_no_data_cells($"inv_masked")).as[Long] + val result = withMasked.agg(rf_agg_no_data_cells($"masked") + rf_agg_no_data_cells($"inv_masked")).as[Long] result.first() should be(tileSize + baseND) @@ -501,29 +571,50 @@ class RasterFunctionsSpec extends FunSpec } it("should mask tile by another identified by specified value") { - val df = Seq[Tile](randTile).toDF("tile") + val df = Seq[Tile](randPRT).toDF("tile") val mask_value = 4 val withMask = df.withColumn("mask", - local_multiply(convert_cell_type( - local_greater($"tile", 50), + rf_local_multiply(rf_convert_cell_type( + rf_local_greater($"tile", 50), "uint8"), lit(mask_value) ) ) val withMasked = withMask.withColumn("masked", - mask_by_value($"tile", $"mask", lit(mask_value))) + rf_mask_by_value($"tile", $"mask", lit(mask_value))) - val result = withMasked.agg(agg_no_data_cells($"tile") < agg_no_data_cells($"masked")).as[Boolean] + val result = withMasked.agg(rf_agg_no_data_cells($"tile") < rf_agg_no_data_cells($"masked")).as[Boolean] result.first() should be(true) checkDocs("rf_mask_by_value") } + it("should inverse mask tile by another identified by specified value") { + val df = Seq[Tile](randPRT).toDF("tile") + val mask_value = 4 + + val withMask = df.withColumn("mask", + rf_local_multiply(rf_convert_cell_type( + rf_local_greater($"tile", 50), + "uint8"), + lit(mask_value) + ) + ) + + val withMasked = withMask.withColumn("masked", + rf_inverse_mask_by_value($"tile", $"mask", lit(mask_value))) + + val result = withMasked.agg(rf_agg_no_data_cells($"tile") < rf_agg_no_data_cells($"masked")).as[Boolean] + + result.first() should be(true) + checkDocs("rf_inverse_mask_by_value") + } + it("should render ascii art") { val df = Seq[Tile](ProjectedRasterTile(TestData.l8Labels)).toDF("tile") - val r1 = df.select(render_ascii($"tile")) + val r1 = df.select(rf_render_ascii($"tile")) val r2 = df.selectExpr("rf_render_ascii(tile)").as[String] r1.first() should be(r2.first()) checkDocs("rf_render_ascii") @@ -531,7 +622,7 @@ class RasterFunctionsSpec extends FunSpec it("should render cells as matrix") { val df = Seq(randDoubleNDTile).toDF("tile") - val r1 = df.select(render_matrix($"tile")) + val r1 = df.select(rf_render_matrix($"tile")) val r2 = df.selectExpr("rf_render_matrix(tile)").as[String] r1.first() should be(r2.first()) checkDocs("rf_render_matrix") @@ -545,9 +636,9 @@ class RasterFunctionsSpec extends FunSpec val df = Seq((three_plus, three_less, three)).toDF("three_plus", "three_less", "three") - assertEqual(df.select(round($"three")).as[ProjectedRasterTile].first(), three) - assertEqual(df.select(round($"three_plus")).as[ProjectedRasterTile].first(), three_double) - assertEqual(df.select(round($"three_less")).as[ProjectedRasterTile].first(), three_double) + assertEqual(df.select(rf_round($"three")).as[ProjectedRasterTile].first(), three) + assertEqual(df.select(rf_round($"three_plus")).as[ProjectedRasterTile].first(), three_double) + assertEqual(df.select(rf_round($"three_less")).as[ProjectedRasterTile].first(), three_double) assertEqual(df.selectExpr("rf_round(three)").as[ProjectedRasterTile].first(), three) assertEqual(df.selectExpr("rf_round(three_plus)").as[ProjectedRasterTile].first(), three_double) @@ -556,27 +647,36 @@ class RasterFunctionsSpec extends FunSpec checkDocs("rf_round") } + it("should abs cell values") { + val minus = one.mapTile(t => t.convert(IntConstantNoDataCellType) * -1) + val df = Seq((minus, one)).toDF("minus", "one") + + assertEqual(df.select(rf_abs($"minus").as[ProjectedRasterTile]).first(), one) + + checkDocs("rf_abs") + } + it("should take logarithms positive cell values"){ - // log10 1000 == 3 + // rf_log10 1000 == 3 val thousand = TestData.projectedRasterTile(cols, rows, 1000, extent, crs, ShortConstantNoDataCellType) val threesDouble = TestData.projectedRasterTile(cols, rows, 3.0, extent, crs, DoubleConstantNoDataCellType) val zerosDouble = TestData.projectedRasterTile(cols, rows, 0.0, extent, crs, DoubleConstantNoDataCellType) val df1 = Seq(thousand).toDF("tile") - assertEqual(df1.select(log10($"tile")).as[ProjectedRasterTile].first(), threesDouble) + assertEqual(df1.select(rf_log10($"tile")).as[ProjectedRasterTile].first(), threesDouble) - // ln random tile == log10 random tile / log10(e); random tile square to ensure all positive cell values + // ln random tile == rf_log10 random tile / rf_log10(e); random tile square to ensure all positive cell values val df2 = Seq(randPositiveDoubleTile).toDF("tile") val log10e = math.log10(math.E) - assertEqual(df2.select(log($"tile")).as[ProjectedRasterTile].first(), - df2.select(log10($"tile")).as[ProjectedRasterTile].first() / log10e) + assertEqual(df2.select(rf_log($"tile")).as[ProjectedRasterTile].first(), + df2.select(rf_log10($"tile")).as[ProjectedRasterTile].first() / log10e) lazy val maybeZeros = df2 .selectExpr(s"rf_local_subtract(rf_log(tile), rf_local_divide(rf_log10(tile), ${log10e}))") .as[ProjectedRasterTile].first() assertEqual(maybeZeros, zerosDouble) - // log1p for zeros should be ln(1) + // rf_log1p for zeros should be ln(1) val ln1 = math.log1p(0.0) val df3 = Seq(zero).toDF("tile") val maybeLn1 = df3.selectExpr(s"rf_log1p(tile)").as[ProjectedRasterTile].first() @@ -594,42 +694,42 @@ class RasterFunctionsSpec extends FunSpec // tile zeros ==> -Infinity val df_0 = Seq(zero).toDF("tile") - assertEqual(df_0.select(log($"tile")).as[ProjectedRasterTile].first(), ni_float) - assertEqual(df_0.select(log10($"tile")).as[ProjectedRasterTile].first(), ni_float) - assertEqual(df_0.select(log2($"tile")).as[ProjectedRasterTile].first(), ni_float) - // log1p of zeros should be 0. - assertEqual(df_0.select(log1p($"tile")).as[ProjectedRasterTile].first(), zero_float) + assertEqual(df_0.select(rf_log($"tile")).as[ProjectedRasterTile].first(), ni_float) + assertEqual(df_0.select(rf_log10($"tile")).as[ProjectedRasterTile].first(), ni_float) + assertEqual(df_0.select(rf_log2($"tile")).as[ProjectedRasterTile].first(), ni_float) + // rf_log1p of zeros should be 0. + assertEqual(df_0.select(rf_log1p($"tile")).as[ProjectedRasterTile].first(), zero_float) // tile negative values ==> NaN assert(df_0.selectExpr("rf_log(rf_local_subtract(tile, 42))").as[ProjectedRasterTile].first().isNoDataTile) assert(df_0.selectExpr("rf_log2(rf_local_subtract(tile, 42))").as[ProjectedRasterTile].first().isNoDataTile) - assert(df_0.select(log1p(local_subtract($"tile", 42))).as[ProjectedRasterTile].first().isNoDataTile) - assert(df_0.select(log10(local_subtract($"tile", lit(0.01)))).as[ProjectedRasterTile].first().isNoDataTile) + assert(df_0.select(rf_log1p(rf_local_subtract($"tile", 42))).as[ProjectedRasterTile].first().isNoDataTile) + assert(df_0.select(rf_log10(rf_local_subtract($"tile", lit(0.01)))).as[ProjectedRasterTile].first().isNoDataTile) } it("should take exponential") { val df = Seq(six).toDF("tile") - // exp inverses log + // rf_exp inverses rf_log assertEqual( - df.select(exp(log($"tile"))).as[ProjectedRasterTile].first(), + df.select(rf_exp(rf_log($"tile"))).as[ProjectedRasterTile].first(), six ) // base 2 assertEqual( - df.select(exp2(log2($"tile"))).as[ProjectedRasterTile].first(), + df.select(rf_exp2(rf_log2($"tile"))).as[ProjectedRasterTile].first(), six) // base 10 assertEqual( - df.select(exp10(log10($"tile"))).as[ProjectedRasterTile].first(), + df.select(rf_exp10(rf_log10($"tile"))).as[ProjectedRasterTile].first(), six) // plus/minus 1 assertEqual( - df.select(expm1(log1p($"tile"))).as[ProjectedRasterTile].first(), + df.select(rf_expm1(rf_log1p($"tile"))).as[ProjectedRasterTile].first(), six) // SQL @@ -647,7 +747,7 @@ class RasterFunctionsSpec extends FunSpec df.selectExpr("rf_exp2(rf_log2(tile))").as[ProjectedRasterTile].first(), six) - // SQL expm1 + // SQL rf_expm1 assertEqual( df.selectExpr("rf_expm1(rf_log1p(tile))").as[ProjectedRasterTile].first(), six) @@ -678,11 +778,11 @@ class RasterFunctionsSpec extends FunSpec def df = Seq(lowRes).toDF("tile") - val maybeUp = df.select(resample($"tile", lit(2))).as[ProjectedRasterTile].first() + val maybeUp = df.select(rf_resample($"tile", lit(2))).as[ProjectedRasterTile].first() assertEqual(maybeUp, upsampled) def df2 = Seq((lowRes, fourByFour)).toDF("tile1", "tile2") - val maybeUpShape = df2.select(resample($"tile1", $"tile2")).as[ProjectedRasterTile].first() + val maybeUpShape = df2.select(rf_resample($"tile1", $"tile2")).as[ProjectedRasterTile].first() assertEqual(maybeUpShape, upsampled) // Downsample by double argument < 1 diff --git a/core/src/test/scala/org/locationtech/rasterframes/RasterJoinSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/RasterJoinSpec.scala new file mode 100644 index 000000000..b2cd5d8ce --- /dev/null +++ b/core/src/test/scala/org/locationtech/rasterframes/RasterJoinSpec.scala @@ -0,0 +1,168 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes + +import geotrellis.raster.resample.Bilinear +import geotrellis.raster.testkit.RasterMatchers +import geotrellis.raster.{IntConstantNoDataCellType, Raster, Tile} +import org.apache.spark.sql.functions._ +import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate +import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate.ProjectedRasterDefinition +import org.locationtech.rasterframes.model.TileDimensions + + +class RasterJoinSpec extends TestEnvironment with TestData with RasterMatchers { + import spark.implicits._ + describe("Raster join between two DataFrames") { + val b4nativeTif = readSingleband("L8-B4-Elkton-VA.tiff") + // Same data, reprojected to EPSG:4326 + val b4warpedTif = readSingleband("L8-B4-Elkton-VA-4326.tiff") + + val b4nativeRf = b4nativeTif.toDF(TileDimensions(10, 10)) + val b4warpedRf = b4warpedTif.toDF(TileDimensions(10, 10)) + .withColumnRenamed("tile", "tile2") + + it("should join the same scene correctly") { + + val b4nativeRfPrime = b4nativeTif.toDF(TileDimensions(10, 10)) + .withColumnRenamed("tile", "tile2") + val joined = b4nativeRf.rasterJoin(b4nativeRfPrime) + + joined.count() should be (b4nativeRf.count()) + + val measure = joined.select( + rf_tile_mean(rf_local_subtract($"tile", $"tile2")) as "diff_mean", + rf_tile_stats(rf_local_subtract($"tile", $"tile2")).getField("variance") as "diff_var") + .as[(Double, Double)] + .collect() + all (measure) should be ((0.0, 0.0)) + } + + it("should join same scene in different tile sizes"){ + val r1prime = b4nativeTif.toDF(TileDimensions(25, 25)).withColumnRenamed("tile", "tile2") + r1prime.select(rf_dimensions($"tile2").getField("rows")).as[Int].first() should be (25) + val joined = b4nativeRf.rasterJoin(r1prime) + + joined.count() should be (b4nativeRf.count()) + + val measure = joined.select( + rf_tile_mean(rf_local_subtract($"tile", $"tile2")) as "diff_mean", + rf_tile_stats(rf_local_subtract($"tile", $"tile2")).getField("variance") as "diff_var") + .as[(Double, Double)] + .collect() + all (measure) should be ((0.0, 0.0)) + + } + + it("should join same scene in two projections, same tile size") { + + // b4warpedRf source data is gdal warped b4nativeRf data; join them together. + val joined = b4nativeRf.rasterJoin(b4warpedRf) + // create a Raster from tile2 which should be almost equal to b4nativeTif + val result = joined.agg(TileRasterizerAggregate( + ProjectedRasterDefinition(b4nativeTif.cols, b4nativeTif.rows, b4nativeTif.cellType, b4nativeTif.crs, b4nativeTif.extent, Bilinear), + $"crs", $"extent", $"tile2") as "raster" + ).select(col("raster").as[Raster[Tile]]).first() + + result.extent shouldBe b4nativeTif.extent + + // Test the overall local difference of the `result` versus the original + import geotrellis.raster.mapalgebra.local._ + val sub = b4nativeTif.extent.buffer(-b4nativeTif.extent.width * 0.01) + val diff = Abs( + Subtract( + result.crop(sub).tile.convert(IntConstantNoDataCellType), + b4nativeTif.raster.crop(sub).tile.convert(IntConstantNoDataCellType) + ) + ) + // DN's within arbitrary threshold. N.B. the range of values in the source raster is (6396, 27835) + diff.statisticsDouble.get.mean should be (0.0 +- 200) + // Overall signal is preserved + val b4nativeStddev = b4nativeTif.tile.statisticsDouble.get.stddev + val rel_diff = diff.statisticsDouble.get.mean / b4nativeStddev + rel_diff should be (0.0 +- 0.15) + + // Use the tile structure of the `joined` dataframe to argue that the structure of the image is similar between `b4nativeTif` and `joined.tile2` + val tile_diffs = joined.select((abs(rf_tile_mean($"tile") - rf_tile_mean($"tile2")) / lit( b4nativeStddev)).alias("z")) + + // Check the 90%-ile z score; recognize there will be some localized areas of larger error + tile_diffs.selectExpr("percentile(z, 0.90)").as[Double].first() should be < 0.10 + // Check the median z score; it is pretty close to zero + tile_diffs.selectExpr("percentile(z, 0.50)").as[Double].first() should be < 0.025 + } + + it("should join multiple RHS tile columns"){ + // join multiple native CRS bands to the EPSG 4326 RF + + val multibandRf = b4nativeRf + .withColumn("t_plus", rf_local_add($"tile", $"tile")) + .withColumn("t_mult", rf_local_multiply($"tile", $"tile")) + multibandRf.tileColumns.length should be (3) + + val multibandJoin = multibandRf.rasterJoin(b4warpedRf) + + multibandJoin.tileColumns.length should be (4) + multibandJoin.count() should be (multibandRf.count()) + } + + it("should join with heterogeneous LHS CRS and coverages"){ + + val df17 = readSingleband("m_3607824_se_17_1_20160620_subset.tif") + .toDF(TileDimensions(50, 50)) + .withColumn("utm", lit(17)) + // neighboring and slightly overlapping NAIP scene + val df18 = readSingleband("m_3607717_sw_18_1_20160620_subset.tif") + .toDF(TileDimensions(60, 60)) + .withColumn("utm", lit(18)) + + df17.count() should be (6 * 6) // file is 300 x 300 + df18.count() should be (5 * 5) // file is 300 x 300 + + val df = df17.union(df18) + df.count() should be (6 * 6 + 5 * 5) + val expectCrs = Array("+proj=utm +zone=17 +datum=NAD83 +units=m +no_defs ", "+proj=utm +zone=18 +datum=NAD83 +units=m +no_defs ") + df.select($"crs".getField("crsProj4")).distinct().as[String].collect() should contain theSameElementsAs expectCrs + + // read a third source to join. burned in box that intersects both above subsets; but more so on the df17 + val box = readSingleband("m_3607_box.tif").toDF(TileDimensions(4,4)).withColumnRenamed("tile", "burned") + val joined = df.rasterJoin(box) + + joined.count() should be (df.count) + + val totals = joined.groupBy($"utm").agg(sum(rf_tile_sum($"burned")).alias("burned_total")) + val total18 = totals.where($"utm" === 18).select($"burned_total").as[Double].first() + val total17 = totals.where($"utm" === 17).select($"burned_total").as[Double].first() + + total18 should be > 0.0 + total18 should be < total17 + + + } + + it("should pass through ancillary columns") { + val left = b4nativeRf.withColumn("left_id", monotonically_increasing_id()) + val right = b4warpedRf.withColumn("right_id", monotonically_increasing_id()) + val joined = left.rasterJoin(right) + joined.columns should contain allElementsOf Seq("left_id", "right_id_agg") + } + } +} diff --git a/core/src/test/scala/org/locationtech/rasterframes/ReprojectGeometrySpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ReprojectGeometrySpec.scala new file mode 100644 index 000000000..a58294287 --- /dev/null +++ b/core/src/test/scala/org/locationtech/rasterframes/ReprojectGeometrySpec.scala @@ -0,0 +1,122 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes + +import geotrellis.proj4.{CRS, LatLng, Sinusoidal, WebMercator} +import org.apache.spark.sql.Encoders +import org.locationtech.jts.geom._ + +/** + * Test for geometry reprojection. + * + * @since 11/29/18 + */ +class ReprojectGeometrySpec extends TestEnvironment { + // Note: Test data copied from ReprojectSpec in GeoTrellis + val fact = new GeometryFactory() + val llLineString: Geometry = fact.createLineString(Array( + new Coordinate(-111.09374999999999, 34.784483415461345), + new Coordinate(-111.09374999999999, 43.29919735147067), + new Coordinate(-75.322265625, 43.29919735147067), + new Coordinate(-75.322265625, 34.784483415461345), + new Coordinate(-111.09374999999999, 34.784483415461345) + )) + + val wmLineString: Geometry = fact.createLineString(Array( + new Coordinate(-12366899.680315234, 4134631.734001753), + new Coordinate(-12366899.680315234, 5357624.186564572), + new Coordinate(-8384836.254770693, 5357624.186564572), + new Coordinate(-8384836.254770693, 4134631.734001753), + new Coordinate(-12366899.680315234, 4134631.734001753) + )) + + describe("Geometry reprojection") { + import spark.implicits._ + + it("should handle two literal CRSs") { + + val df = Seq((llLineString, wmLineString)).toDF("ll", "wm") + + val rp = df.select( + st_reproject($"ll", LatLng, WebMercator) as "wm2", + st_reproject($"wm", WebMercator, LatLng) as "ll2", + st_reproject(st_reproject($"ll", LatLng, Sinusoidal), Sinusoidal, WebMercator) as "wm3" + ).as[(Geometry, Geometry, Geometry)] + + + val (wm2, ll2, wm3) = rp.first() + + wm2 should matchGeom(wmLineString, 0.00001) + ll2 should matchGeom(llLineString, 0.00001) + wm3 should matchGeom(wmLineString, 0.00001) + } + + it("should handle one literal crs") { + implicit val enc = Encoders.tuple(jtsGeometryEncoder, jtsGeometryEncoder, crsEncoder) + val df = Seq((llLineString, wmLineString, LatLng: CRS)).toDF("ll", "wm", "llCRS") + + val rp = df.select( + st_reproject($"ll", $"llCRS", WebMercator) as "wm2", + st_reproject($"wm", WebMercator, $"llCRS") as "ll2", + st_reproject(st_reproject($"ll", $"llCRS", Sinusoidal), Sinusoidal, WebMercator) as "wm3" + ).as[(Geometry, Geometry, Geometry)] + + + val (wm2, ll2, wm3) = rp.first() + + wm2 should matchGeom(wmLineString, 0.00001) + ll2 should matchGeom(llLineString, 0.00001) + wm3 should matchGeom(wmLineString, 0.00001) + } + + it("should accept other geometry types") { + val df = Seq(1, 2, 3).toDF("id") + + noException shouldBe thrownBy { + df.select(st_reproject(st_makePoint($"id", $"id"), WebMercator, Sinusoidal)).count() + } + } + + it("should work in SQL") { + implicit val enc = Encoders.tuple(jtsGeometryEncoder, jtsGeometryEncoder, crsEncoder) + val df = Seq((llLineString, wmLineString, LatLng: CRS)).toDF("ll", "wm", "llCRS") + df.createOrReplaceTempView("geom") + + val rp = spark.sql( + """ + | SELECT st_reproject(ll, llCRS, 'EPSG:3857') as wm2, + | st_reproject(wm, 'EPSG:3857', llCRS) as ll2, + | st_reproject(st_reproject(ll, llCRS, '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs'), + | '+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs', 'EPSG:3857') as wm3 + | FROM geom + """.stripMargin).as[(Geometry, Geometry, Geometry)] + + val (wm2, ll2, wm3) = rp.first() + + wm2 should matchGeom(wmLineString, 0.00001) + ll2 should matchGeom(llLineString, 0.00001) + wm3 should matchGeom(wmLineString, 0.00001) + + checkDocs("st_reproject") + } + } +} diff --git a/core/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala b/core/src/test/scala/org/locationtech/rasterframes/SpatialKeySpec.scala similarity index 87% rename from core/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/SpatialKeySpec.scala index 065e9a5ed..b99b5c48e 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/SpatialKeySpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/SpatialKeySpec.scala @@ -15,11 +15,12 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes -import com.vividsolutions.jts.geom.Polygon import geotrellis.proj4.LatLng import geotrellis.vector.Point import org.locationtech.geomesa.curve.Z2SFC @@ -37,11 +38,11 @@ class SpatialKeySpec extends TestEnvironment with TestData { describe("Spatial key conversions") { val raster = sampleGeoTiff.projectedRaster // Create a raster frame with a single row - val rf = raster.toRF(raster.tile.cols, raster.tile.rows) + val rf = raster.toLayer(raster.tile.cols, raster.tile.rows) it("should add an extent column") { val expected = raster.extent.jtsGeom - val result = rf.withBounds().select($"bounds".as[Polygon]).first + val result = rf.withGeometry().select(GEOMETRY_COLUMN).first assert(result === expected) } @@ -64,6 +65,4 @@ class SpatialKeySpec extends TestEnvironment with TestData { assert(result === expected) } } - // This is to avoid an IntelliJ error - protected def withFixture(test: Any) = ??? } diff --git a/core/src/test/scala/astraea/spark/rasterframes/TestData.scala b/core/src/test/scala/org/locationtech/rasterframes/TestData.scala similarity index 76% rename from core/src/test/scala/astraea/spark/rasterframes/TestData.scala rename to core/src/test/scala/org/locationtech/rasterframes/TestData.scala index 29c06849e..1b6b373e9 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/TestData.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/TestData.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright (c) 2017. Astraea, Inc. + * Copyright 2017 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -14,19 +14,17 @@ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes import java.net.URI -import java.nio.file.Paths +import java.nio.file.{Files, Paths} import java.time.ZonedDateTime -import astraea.spark.rasterframes.expressions.tilestats.NoDataCells -import astraea.spark.rasterframes.model.TileContext -import astraea.spark.rasterframes.tiles.ProjectedRasterTile -import astraea.spark.rasterframes.{functions => F} -import com.vividsolutions.jts.geom.{Coordinate, GeometryFactory} import geotrellis.proj4.{CRS, LatLng} import geotrellis.raster import geotrellis.raster._ @@ -38,6 +36,10 @@ import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.commons.io.IOUtils import org.apache.spark.SparkContext import org.apache.spark.sql.SparkSession +import org.locationtech.jts.geom.{Coordinate, GeometryFactory} +import org.locationtech.rasterframes.expressions.tilestats.NoDataCells +import org.locationtech.rasterframes.tiles.ProjectedRasterTile +import spray.json.JsObject import scala.reflect.ClassTag @@ -109,6 +111,12 @@ trait TestData { require((1 to 11).contains(band), "Invalid band number") readSingleband(s"L8-B$band-Elkton-VA.tiff") } + + def l8SamplePath(band: Int) = { + require((1 to 11).contains(band), "Invalid band number") + getClass.getResource(s"/L8-B$band-Elkton-VA.tiff").toURI + } + def l8Labels = readSingleband("L8-Labels-Elkton-VA.tiff") def naipSample(band: Int) = { @@ -116,22 +124,36 @@ trait TestData { readSingleband(s"NAIP-VA-b$band.tiff") } - def rgbCogSample = readMultiband("LC08_RGB_Norfolk_COG.tiff") + def rgbCogSample = readMultiband("LC08_RGB_Norfolk_COG.tiff") + + def rgbCogSamplePath = getClass.getResource("/LC08_RGB_Norfolk_COG.tiff").toURI def sampleTileLayerRDD(implicit spark: SparkSession): TileLayerRDD[SpatialKey] = { - val rf = sampleGeoTiff.projectedRaster.toRF(128, 128) + val rf = sampleGeoTiff.projectedRaster.toLayer(128, 128) rf.toTileLayerRDD(rf.tileColumns.head).left.get } private val baseCOG = "https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/149/039/LC08_L1TP_149039_20170411_20170415_01_T1/LC08_L1TP_149039_20170411_20170415_01_T1_%s.TIF" - lazy val remoteCOGSingleband1 = URI.create(baseCOG.format("B1")) - lazy val remoteCOGSingleband2 = URI.create(baseCOG.format("B2")) + lazy val remoteCOGSingleband1: URI = URI.create(baseCOG.format("B1")) + lazy val remoteCOGSingleband2: URI = URI.create(baseCOG.format("B2")) + + lazy val remoteCOGMultiband: URI = URI.create("https://s3-us-west-2.amazonaws.com/radiant-nasa-iserv/2014/02/14/IP0201402141023382027S03100E/IP0201402141023382027S03100E-COG.tif") + + lazy val remoteMODIS: URI = URI.create("https://modis-pds.s3.amazonaws.com/MCD43A4.006/31/11/2017158/MCD43A4.A2017158.h31v11.006.2017171203421_B01.TIF") + lazy val remoteL8: URI = URI.create("https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/017/033/LC08_L1TP_017033_20181010_20181030_01_T1/LC08_L1TP_017033_20181010_20181030_01_T1_B4.TIF") + lazy val remoteHttpMrfPath: URI = URI.create("https://s3.amazonaws.com/s22s-rasterframes-integration-tests/m_3607526_sw_18_1_20160708.mrf") + lazy val remoteS3MrfPath: URI = URI.create("s3://naip-analytic/va/2016/100cm/rgbir/37077/m_3707764_sw_18_1_20160708.mrf") - lazy val remoteCOGMultiband = URI.create("https://s3-us-west-2.amazonaws.com/radiant-nasa-iserv/2014/02/14/IP0201402141023382027S03100E/IP0201402141023382027S03100E-COG.tif") + lazy val localSentinel: URI = getClass.getResource("/B01.jp2").toURI + lazy val cogPath: URI = getClass.getResource("/LC08_RGB_Norfolk_COG.tiff").toURI + lazy val singlebandCogPath: URI = getClass.getResource("/LC08_B7_Memphis_COG.tiff").toURI + lazy val nonCogPath: URI = getClass.getResource("/L8-B8-Robinson-IL.tiff").toURI - lazy val remoteMODIS = URI.create("https://modis-pds.s3.amazonaws.com/MCD43A4.006/31/11/2017158/MCD43A4.A2017158.h31v11.006.2017171203421_B01.TIF") + lazy val l8B1SamplePath: URI = l8SamplePath(1) + lazy val l8samplePath: URI = getClass.getResource("/L8-B1-Elkton-VA.tiff").toURI + lazy val modisConvertedMrfPath: URI = getClass.getResource("/MCD43A4.A2019111.h30v06.006.2019120033434_01.mrf").toURI - object JTS { + object GeomData { val fact = new GeometryFactory() val c1 = new Coordinate(1, 2) val c2 = new Coordinate(3, 4) @@ -144,6 +166,19 @@ trait TestData { val mpoly = fact.createMultiPolygon(Array(poly, poly, poly)) val coll = fact.createGeometryCollection(Array(point, line, poly, mpoint, mline, mpoly)) val all = Seq(point, line, poly, mpoint, mline, mpoly, coll) + lazy val geoJson = { + import scala.collection.JavaConversions._ + val p = Paths.get(TestData.getClass + .getResource("/L8-Labels-Elkton-VA.geojson").toURI) + Files.readAllLines(p).mkString("\n") + } + lazy val features = { + import geotrellis.vector.io._ + import geotrellis.vector.io.json.JsonFeatureCollection + import spray.json.DefaultJsonProtocol._ + import spray.json._ + GeomData.geoJson.parseGeoJson[JsonFeatureCollection].getAllPolygonFeatures[JsObject]() + } } } diff --git a/core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala b/core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala similarity index 74% rename from core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala rename to core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala index aaf173014..87ab2559d 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/TestEnvironment.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/TestEnvironment.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright (c) 2017. Astraea, Inc. + * Copyright 2017 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -14,25 +14,25 @@ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes import java.nio.file.{Files, Paths} -import astraea.spark.rasterframes.encoders.StandardEncoders.PrimitiveEncoders.stringEnc -import astraea.spark.rasterframes.ref.RasterSource -import astraea.spark.rasterframes.ref.RasterSource.ReadCallback -import astraea.spark.rasterframes.util.toParquetFriendlyColumnName -import com.vividsolutions.jts.geom.Geometry +import com.typesafe.scalalogging.LazyLogging import geotrellis.spark.testkit.{TestEnvironment => GeoTrellisTestEnvironment} -import geotrellis.util.LazyLogging import org.apache.spark.SparkContext import org.apache.spark.sql._ import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.StructType +import org.locationtech.jts.geom.Geometry import org.scalactic.Tolerance import org.scalatest._ import org.scalatest.matchers.{MatchResult, Matcher} +import org.locationtech.rasterframes.util._ trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment with Matchers with Inspectors with Tolerance with LazyLogging { @@ -40,11 +40,14 @@ trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment override def sparkMaster: String = "local[*]" override implicit def sc: SparkContext = { _sc.setLogLevel("ERROR"); _sc } - //p.setProperty(“spark.driver.allowMultipleContexts”, “true”) lazy val sqlContext: SQLContext = { - val session = SparkSession.builder.config(_sc.getConf).getOrCreate() - astraea.spark.rasterframes.WithSQLContextMethods(session.sqlContext).withRasterFrames + val session = SparkSession.builder + .config(_sc.getConf) + .config("spark.sql.crossJoin.enabled", true) + .withKryoSerialization + .getOrCreate() + session.sqlContext.withRasterFrames } lazy val sql: String ⇒ DataFrame = sqlContext.sql @@ -86,6 +89,7 @@ trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment def matchGeom(g: Geometry, tolerance: Double) = new GeometryMatcher(g, tolerance) def checkDocs(name: String): Unit = { + import spark.implicits._ val docs = sql(s"DESCRIBE FUNCTION EXTENDED $name").as[String].collect().mkString("\n") docs should include(name) docs shouldNot include("not found") @@ -95,16 +99,5 @@ trait TestEnvironment extends FunSpec with GeoTrellisTestEnvironment } object TestEnvironment { - case class ReadMonitor(ignoreHeader: Boolean = true) extends ReadCallback with LazyLogging { - var reads: Int = 0 - var total: Long = 0 - override def readRange(source: RasterSource, start: Long, length: Int): Unit = { - logger.trace(s"Reading $length at $start from $source") - // Ignore header reads - if(!ignoreHeader || start > 0) reads += 1 - total += length - } - override def toString: String = s"$productPrefix(reads=$reads, total=$total)" - } } \ No newline at end of file diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/TileAssemblerSpec.scala similarity index 78% rename from core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/TileAssemblerSpec.scala index 29eff421f..757231595 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/TileAssemblerSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/TileAssemblerSpec.scala @@ -19,16 +19,15 @@ * */ -package astraea.spark.rasterframes -import astraea.spark.rasterframes.ref.RasterSource -import astraea.spark.rasterframes.ref.RasterSource.InMemoryRasterSource +package org.locationtech.rasterframes + import com.typesafe.scalalogging.LazyLogging import geotrellis.proj4.LatLng import geotrellis.raster._ import geotrellis.raster.render.ColorRamps import geotrellis.vector.Extent -import org.apache.spark.sql._ -import org.apache.spark.sql.{functions => F} +import org.apache.spark.sql.{functions => F, _} +import org.locationtech.rasterframes.ref.{InMemoryRasterSource, RasterSource} /** * @@ -42,15 +41,15 @@ class TileAssemblerSpec extends TestEnvironment { it("should reassemble a small scene") { val raster = TestData.l8Sample(8).projectedRaster - val rf = raster.toRF(16, 16) + val rf = raster.toLayer(16, 16) val ct = rf.tileLayerMetadata.merge.cellType val (tileCols, tileRows) = rf.tileLayerMetadata.merge.tileLayout.tileDimensions - val exploded = rf.select($"spatial_key", explode_tiles($"tile")) + val exploded = rf.select($"spatial_key", rf_explode_tiles($"tile")) val assembled = exploded .groupBy($"spatial_key") - .agg(assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, $"tile", tileCols, tileRows, ct)) + .agg(rf_assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, $"tile", tileCols, tileRows, ct)) assert( @@ -65,12 +64,13 @@ class TileAssemblerSpec extends TestEnvironment { val sceneSize = (260, 257) val rs = InMemoryRasterSource(TestData.randomTile(sceneSize._1, sceneSize._2, ByteConstantNoDataCellType), Extent(10, 20, 30, 40), LatLng) val df = rs.toDF - val exploded = df.select($"spatial_index", $"extent", tile_dimensions($"tile") as "tile_dimensions", explode_tiles($"tile")) + val exploded = df.select($"spatial_index", $"extent", rf_dimensions($"tile") as "tile_dimensions", rf_explode_tiles($"tile")) val assembled = exploded .groupBy($"spatial_index", $"extent", $"tile_dimensions") .agg( - convert_cell_type(assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, + rf_convert_cell_type( + rf_assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, $"tile", $"tile_dimensions.cols", $"tile_dimensions.rows"), rs.cellType) as "tile" ) @@ -89,7 +89,7 @@ class TileAssemblerSpec extends TestEnvironment { val exploded = util.time("exploded") { df - .select($"spatial_index", explode_tiles($"tile")) + .select($"spatial_index", rf_explode_tiles($"tile")) .forceCache } @@ -98,7 +98,7 @@ class TileAssemblerSpec extends TestEnvironment { val assembled = util.time("assembled") { exploded .groupBy($"spatial_index") - .agg(assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, + .agg(rf_assemble_tile(COLUMN_INDEX_COLUMN, ROW_INDEX_COLUMN, $"tile", 256, 256, UShortUserDefinedNoDataCellType(32767))) .forceCache @@ -111,8 +111,8 @@ class TileAssemblerSpec extends TestEnvironment { assert(assembled.count() === df.count()) - val expected = df.select(agg_stats($"tile")).first() - val result = assembled.select(agg_stats($"tile")).first() + val expected = df.select(rf_agg_stats($"tile")).first() + val result = assembled.select(rf_agg_stats($"tile")).first() assert(result.copy(no_data_cells = expected.no_data_cells) === expected) } @@ -134,9 +134,9 @@ object TileAssemblerSpec extends LazyLogging { implicit class WithToDF(val rs: RasterSource) { def toDF(implicit spark: SparkSession): DataFrame = { import spark.implicits._ - rs.readAll().left.get + rs.readAll() .zipWithIndex - .map { case (r, i) ⇒ (i, r.extent, r.tile) } + .map { case (r, i) ⇒ (i, r.extent, r.tile.band(0)) } .toDF("spatial_index", "extent", "tile") .repartition($"spatial_index") .forceCache diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/TileStatsSpec.scala similarity index 78% rename from core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/TileStatsSpec.scala index 781b8290d..50920ab1c 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/TileStatsSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/TileStatsSpec.scala @@ -15,18 +15,18 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes -import astraea.spark.rasterframes.TestData.randomTile -import astraea.spark.rasterframes.TestData.fracTile -import astraea.spark.rasterframes.expressions.aggstats.LocalMeanAggregate -import astraea.spark.rasterframes.stats.CellHistogram import geotrellis.raster._ -import geotrellis.spark._ import geotrellis.raster.mapalgebra.local.{Max, Min} +import geotrellis.spark._ import org.apache.spark.sql.functions._ +import org.locationtech.rasterframes.TestData.randomTile +import org.locationtech.rasterframes.stats.CellHistogram /** * Test rig associated with computing statistics and other descriptive @@ -35,21 +35,21 @@ import org.apache.spark.sql.functions._ * @since 9/18/17 */ class TileStatsSpec extends TestEnvironment with TestData { - import sqlContext.implicits._ import TestData.injectND + import sqlContext.implicits._ describe("computing statistics over tiles") { //import org.apache.spark.sql.execution.debug._ it("should report dimensions") { val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2") - val dims = df.select(tile_dimensions($"tile1") as "dims").select("dims.*") + val dims = df.select(rf_dimensions($"tile1") as "dims").select("dims.*") assert(dims.as[(Int, Int)].first() === (3, 3)) assert(dims.schema.head.name === "cols") val query = sql("""|select dims.* from ( - |select rf_tile_dimensions(tiles) as dims from ( + |select rf_dimensions(tiles) as dims from ( |select rf_make_constant_tile(1, 10, 10, 'int8raw') as tiles)) |""".stripMargin) write(query) @@ -57,7 +57,7 @@ class TileStatsSpec extends TestEnvironment with TestData { df.repartition(4).createOrReplaceTempView("tmp") assert( - sql("select dims.* from (select rf_tile_dimensions(tile2) as dims from tmp)") + sql("select dims.* from (select rf_dimensions(tile2) as dims from tmp)") .as[(Int, Int)] .first() === (3, 3)) } @@ -67,20 +67,20 @@ class TileStatsSpec extends TestEnvironment with TestData { forEvery(ct) { c => val expected = CellType.fromName(c) val tile = randomTile(5, 5, expected) - val result = Seq(tile).toDF("tile").select(cell_type($"tile")).first() + val result = Seq(tile).toDF("tile").select(rf_cell_type($"tile")).first() result should be(expected) } } // tiles defined for the next few tests - val tile1 = fracTile(10, 10, 5) + val tile1 = TestData.fracTile(10, 10, 5) val tile2 = ArrayTile(Array(-5, -4, -3, -2, -1, 0, 1, 2, 3), 3, 3) val tile3 = randomTile(255, 255, IntCellType) it("should compute accurate item counts") { val ds = Seq[Tile](tile1, tile2, tile3).toDF("tiles") val checkedValues = Seq[Double](0, 4, 7, 13, 26) - val result = checkedValues.map(x => ds.select(tile_histogram($"tiles")).first().itemCount(x)) + val result = checkedValues.map(x => ds.select(rf_tile_histogram($"tiles")).first().itemCount(x)) forEvery(checkedValues) { x => assert((x == 0 && result.head == 4) || result.contains(x - 1)) } @@ -89,7 +89,7 @@ class TileStatsSpec extends TestEnvironment with TestData { it("Should compute quantiles") { val ds = Seq[Tile](tile1, tile2, tile3).toDF("tiles") val numBreaks = 5 - val breaks = ds.select(tile_histogram($"tiles")).map(_.quantileBreaks(numBreaks)).collect() + val breaks = ds.select(rf_tile_histogram($"tiles")).map(_.quantileBreaks(numBreaks)).collect() assert(breaks(1).length === numBreaks) assert(breaks(0).apply(2) == 25) assert(breaks(1).max <= 3 && breaks.apply(1).min >= -5) @@ -101,7 +101,7 @@ class TileStatsSpec extends TestEnvironment with TestData { ds.createOrReplaceTempView("tmp") withClue("max") { - val max = ds.agg(agg_local_max($"tiles")) + val max = ds.agg(rf_agg_local_max($"tiles")) val expected = Max(byteArrayTile, byteConstantTile) write(max) assert(max.as[Tile].first() === expected) @@ -112,7 +112,7 @@ class TileStatsSpec extends TestEnvironment with TestData { } withClue("min") { - val min = ds.agg(agg_local_min($"tiles")) + val min = ds.agg(rf_agg_local_min($"tiles")) val expected = Min(byteArrayTile, byteConstantTile) write(min) assert(min.as[Tile].first() === Min(byteArrayTile, byteConstantTile)) @@ -127,19 +127,19 @@ class TileStatsSpec extends TestEnvironment with TestData { withClue("mean") { val ds = Seq.fill[Tile](3)(randomTile(5, 5, FloatConstantNoDataCellType)).toDS() - val means1 = ds.select(tile_stats($"value")).map(_.mean).collect - val means2 = ds.select(tile_mean($"value")).collect + val means1 = ds.select(rf_tile_stats($"value")).map(_.mean).collect + val means2 = ds.select(rf_tile_mean($"value")).collect // Compute the mean manually, knowing we're not dealing with no-data values. val means = - ds.select(tile_to_array_double($"value")).map(a => a.sum / a.length).collect + ds.select(rf_tile_to_array_double($"value")).map(a => a.sum / a.length).collect forAll(means.zip(means1)) { case (l, r) => assert(l === r +- 1e-6) } forAll(means.zip(means2)) { case (l, r) => assert(l === r +- 1e-6) } } withClue("sum") { - val rf = l8Sample(1).projectedRaster.toRF + val rf = l8Sample(1).toDF() val expected = 309149454 // computed with rasterio - val result = rf.agg(sum(tile_sum($"tile"))).collect().head.getDouble(0) + val result = rf.agg(sum(rf_tile_sum($"tile"))).collect().head.getDouble(0) logger.info(s"L8 sample band 1 grand total: ${result}") assert(result === expected) } @@ -149,7 +149,7 @@ class TileStatsSpec extends TestEnvironment with TestData { val ds = Seq.fill[Tile](3)(randomTile(5, 5, FloatCellType)).toDF("tiles") ds.createOrReplaceTempView("tmp") - val r1 = ds.select(tile_histogram($"tiles")) + val r1 = ds.select(rf_tile_histogram($"tiles")) assert(r1.first.totalCount === 5 * 5) write(r1) val r2 = sql("select hist.* from (select rf_tile_histogram(tiles) as hist from tmp)").as[CellHistogram] @@ -179,7 +179,7 @@ class TileStatsSpec extends TestEnvironment with TestData { .fill[Tile](rows)(randomTile(tileSize, tileSize, FloatConstantNoDataCellType)) .toDF("tiles") ds.createOrReplaceTempView("tmp") - val agg = ds.select(agg_approx_histogram($"tiles")) + val agg = ds.select(rf_agg_approx_histogram($"tiles")) val histArray = agg.collect() histArray.length should be (1) @@ -198,21 +198,21 @@ class TileStatsSpec extends TestEnvironment with TestData { it("should compute aggregate mean") { val ds = (Seq.fill[Tile](10)(randomTile(5, 5, FloatCellType)) :+ null).toDF("tiles") - val agg = ds.select(agg_mean($"tiles")) - val stats = ds.select(agg_stats($"tiles") as "stats").select($"stats.mean".as[Double]) + val agg = ds.select(rf_agg_mean($"tiles")) + val stats = ds.select(rf_agg_stats($"tiles") as "stats").select($"stats.mean".as[Double]) assert(agg.first() === stats.first()) } it("should compute aggregate statistics") { val ds = Seq.fill[Tile](10)(randomTile(5, 5, FloatConstantNoDataCellType)).toDF("tiles") - val exploded = ds.select(explode_tiles($"tiles")) + val exploded = ds.select(rf_explode_tiles($"tiles")) val (mean, vrnc) = exploded.agg(avg($"tiles"), var_pop($"tiles")).as[(Double, Double)].first - val stats = ds.select(agg_stats($"tiles") as "stats") ///.as[(Long, Double, Double, Double, Double)] + val stats = ds.select(rf_agg_stats($"tiles") as "stats") ///.as[(Long, Double, Double, Double, Double)] //stats.printSchema() noException shouldBe thrownBy { - ds.select(agg_stats($"tiles")).collect() + ds.select(rf_agg_stats($"tiles")).collect() } val agg = stats.select($"stats.variance".as[Double]) @@ -223,7 +223,7 @@ class TileStatsSpec extends TestEnvironment with TestData { val agg2 = sql("select stats.* from (select rf_agg_stats(tiles) as stats from tmp)") assert(agg2.first().getAs[Long]("data_cells") === 250L) - val agg3 = ds.agg(agg_stats($"tiles") as "stats").select($"stats.mean".as[Double]) + val agg3 = ds.agg(rf_agg_stats($"tiles") as "stats").select($"stats.mean".as[Double]) assert(mean === agg3.first()) } @@ -236,7 +236,7 @@ class TileStatsSpec extends TestEnvironment with TestData { .map(injectND(2)) :+ null).toDF("tiles") ds.createOrReplaceTempView("tmp") - val agg = ds.select(agg_local_stats($"tiles") as "stats") + val agg = ds.select(rf_agg_local_stats($"tiles") as "stats") val stats = agg.select("stats.*") //printStatsRows(stats) @@ -269,25 +269,25 @@ class TileStatsSpec extends TestEnvironment with TestData { val dsNd = (Seq.fill(20)(completeTile) :+ incompleteTile :+ null).toDF("tiles") // counted everything properly - val countTile = ds.select(agg_local_data_cells($"tiles")).first() + val countTile = ds.select(rf_agg_local_data_cells($"tiles")).first() forAll(countTile.toArray())(i => assert(i === 20)) - val countArray = dsNd.select(agg_local_data_cells($"tiles")).first().toArray() + val countArray = dsNd.select(rf_agg_local_data_cells($"tiles")).first().toArray() val expectedCount = (completeTile.localDefined().toArray zip incompleteTile.localDefined().toArray()).toSeq.map( pr => pr._1 * 20 + pr._2) assert(countArray === expectedCount) - val countNodataArray = dsNd.select(agg_local_no_data_cells($"tiles")).first().toArray + val countNodataArray = dsNd.select(rf_agg_local_no_data_cells($"tiles")).first().toArray assert(countNodataArray === incompleteTile.localUndefined().toArray) - val minTile = dsNd.select(agg_local_min($"tiles")).first() + val minTile = dsNd.select(rf_agg_local_min($"tiles")).first() assert(minTile.toArray() === completeTile.toArray()) - val maxTile = dsNd.select(agg_local_max($"tiles")).first() + val maxTile = dsNd.select(rf_agg_local_max($"tiles")).first() assert(maxTile.toArray() === completeTile.toArray()) - val meanTile = dsNd.select(agg_local_mean($"tiles")).first() + val meanTile = dsNd.select(rf_agg_local_mean($"tiles")).first() assert(meanTile.toArray() === completeTile.toArray()) } } @@ -300,20 +300,20 @@ class TileStatsSpec extends TestEnvironment with TestData { .map(injectND(nds)) :+ null).toDF("tiles") it("should count cells by NoData state") { - val counts = tiles.select(no_data_cells($"tiles")).collect().dropRight(1) + val counts = tiles.select(rf_no_data_cells($"tiles")).collect().dropRight(1) forEvery(counts)(c => assert(c === nds)) - val counts2 = tiles.select(data_cells($"tiles")).collect().dropRight(1) + val counts2 = tiles.select(rf_data_cells($"tiles")).collect().dropRight(1) forEvery(counts2)(c => assert(c === tsize * tsize - nds)) } it("should detect all NoData tiles") { - val ndCount = tiles.select("*").where(is_no_data_tile($"tiles")).count() + val ndCount = tiles.select("*").where(rf_is_no_data_tile($"tiles")).count() ndCount should be(1) val ndTiles = (Seq.fill[Tile](count)(ArrayTile.empty(UByteConstantNoDataCellType, tsize, tsize)) :+ null) .toDF("tiles") - val ndCount2 = ndTiles.select("*").where(is_no_data_tile($"tiles")).count() + val ndCount2 = ndTiles.select("*").where(rf_is_no_data_tile($"tiles")).count() ndCount2 should be(count + 1) } } diff --git a/core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/TileUDTSpec.scala similarity index 80% rename from core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/TileUDTSpec.scala index b83b94486..3081b2f64 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/TileUDTSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/TileUDTSpec.scala @@ -1,5 +1,3 @@ - - /* * This software is licensed under the Apache 2 license, quoted below. * @@ -17,19 +15,21 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes - -import astraea.spark.rasterframes.encoders.CatalystSerializer._ -import astraea.spark.rasterframes.functions.cellTypes +package org.locationtech.rasterframes import geotrellis.raster.{CellType, Tile} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.rf._ +import org.apache.spark.sql.types.StringType +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.tiles.ShowableTile import org.scalatest.Inspectors /** - * RasterFrame test rig. + * RasterFrameLayer test rig. * * @since 7/10/17 */ @@ -38,12 +38,11 @@ class TileUDTSpec extends TestEnvironment with TestData with Inspectors { spark.version val tileEncoder: ExpressionEncoder[Tile] = ExpressionEncoder() - val TileType = new TileUDT() implicit val ser = TileUDT.tileSerializer describe("TileUDT") { - val tileSizes = Seq(2, 64, 128, 222, 511) - val ct = cellTypes().filter(_ != "bool") + val tileSizes = Seq(2, 7, 64, 128, 511) + val ct = functions.cellTypes().filter(_ != "bool") def forEveryConfig(test: Tile ⇒ Unit): Unit = { forEvery(tileSizes.combinations(2).toSeq) { case Seq(cols, rows) ⇒ @@ -93,5 +92,13 @@ class TileUDTSpec extends TestEnvironment with TestData with Inspectors { } } } + + it("should provide a pretty-print tile") { + import spark.implicits._ + forEveryConfig { tile => + val stringified = Seq(tile).toDF("tile").select($"tile".cast(StringType)).as[String].first() + stringified should be(ShowableTile.show(tile)) + } + } } } diff --git a/core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/encoders/CatalystSerializerSpec.scala similarity index 86% rename from core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/encoders/CatalystSerializerSpec.scala index c489b8d7b..4e8bfdfcc 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/encoders/CatalystSerializerSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/encoders/CatalystSerializerSpec.scala @@ -19,19 +19,20 @@ * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders + import java.time.ZonedDateTime -import astraea.spark.rasterframes.encoders.StandardEncoders._ -import astraea.spark.rasterframes.model.{CellContext, TileContext, TileDataContext, TileDimensions} -import astraea.spark.rasterframes.ref.{RasterRef, RasterSource} -import astraea.spark.rasterframes.{TestData, TestEnvironment} import geotrellis.proj4._ import geotrellis.raster.{CellSize, CellType, TileLayout, UShortUserDefinedNoDataCellType} import geotrellis.spark.tiling.LayoutDefinition -import geotrellis.spark.{Bounds, KeyBounds, SpaceTimeKey, SpatialKey, TileLayerMetadata} +import geotrellis.spark.{KeyBounds, SpaceTimeKey, SpatialKey, TileLayerMetadata} import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.locationtech.rasterframes.{TestData, TestEnvironment} +import org.locationtech.rasterframes.encoders.StandardEncoders._ +import org.locationtech.rasterframes.model.{CellContext, TileContext, TileDataContext, TileDimensions} +import org.locationtech.rasterframes.ref.{RasterRef, RasterSource} import org.scalatest.Assertion class CatalystSerializerSpec extends TestEnvironment with TestData { @@ -102,8 +103,9 @@ class CatalystSerializerSpec extends TestEnvironment with TestData { } it("should serialize RasterRef") { + // TODO: Decide if RasterRef should be encoded 'flat', non-'flat', or depends val src = RasterSource(remoteCOGSingleband1) - val value = RasterRef(src, Some(src.extent.buffer(-3.0))) + val value = RasterRef(src, 0, Some(src.extent.buffer(-3.0))) assertConsistent(value) assertInvertable(value) } @@ -116,17 +118,17 @@ class CatalystSerializerSpec extends TestEnvironment with TestData { assertContract(ext) } - it("should eserialize ProjectedExtent") { + it("should serialize ProjectedExtent") { val pe = ProjectedExtent(ext, ConusAlbers) assertContract(pe) } - it("should eserialize SpatialKey") { + it("should serialize SpatialKey") { val v = SpatialKey(2, 3) assertContract(v) } - it("should eserialize SpaceTimeKey") { + it("should serialize SpaceTimeKey") { val v = SpaceTimeKey(2, 3, ZonedDateTime.now()) assertContract(v) } diff --git a/core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala similarity index 90% rename from core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala index a0c0bad0e..b27d5cccd 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/encoders/EncodingSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/encoders/EncodingSpec.scala @@ -19,14 +19,13 @@ * */ -package astraea.spark.rasterframes.encoders +package org.locationtech.rasterframes.encoders import java.io.File import java.net.URI -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.tiles.ProjectedRasterTile -import com.vividsolutions.jts.geom.Envelope +import org.locationtech.rasterframes._ +import org.locationtech.jts.geom.Envelope import geotrellis.proj4._ import geotrellis.raster.{CellType, Tile, TileFeature} import geotrellis.spark.{SpaceTimeKey, SpatialKey, TemporalProjectedExtent, TileLayerMetadata} @@ -34,6 +33,8 @@ import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.spark.sql.Row import org.apache.spark.sql.functions._ import org.apache.spark.sql.rf.TileUDT +import org.locationtech.rasterframes.TestEnvironment +import org.locationtech.rasterframes.tiles.ProjectedRasterTile /** * Test rig for encoding GT types into Catalyst types. @@ -151,5 +152,13 @@ class EncodingSpec extends TestEnvironment with TestData { assert(ds.first === env) } } + describe("Dataframe encoding ops on spatial types") { + it("should code RDD[Point]") { + val points = Seq(null, extent.center.jtsGeom, null) + val ds = points.toDS + write(ds) + assert(ds.collect().toSeq === points) + } + } } diff --git a/core/src/test/scala/org/locationtech/rasterframes/expressions/ProjectedLayerMetadataAggregateTest.scala b/core/src/test/scala/org/locationtech/rasterframes/expressions/ProjectedLayerMetadataAggregateTest.scala new file mode 100644 index 000000000..4d4949357 --- /dev/null +++ b/core/src/test/scala/org/locationtech/rasterframes/expressions/ProjectedLayerMetadataAggregateTest.scala @@ -0,0 +1,59 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.expressions + +import geotrellis.raster.Tile +import geotrellis.spark._ +import geotrellis.spark.tiling.FloatingLayoutScheme +import geotrellis.vector.{Extent, ProjectedExtent} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.serialized_literal +import org.locationtech.rasterframes.expressions.aggregates.ProjectedLayerMetadataAggregate +import org.locationtech.rasterframes.model.TileDimensions + +class ProjectedLayerMetadataAggregateTest extends TestEnvironment { + + import spark.implicits._ + + describe("ProjectedLayerMetadataAggregate") { + it("should collect metadata from RasterFrame") { + val image = TestData.sampleGeoTiff + val rf = image.projectedRaster.toLayer(60, 65) + val crs = rf.crs + + val df = rf.withExtent() + .select($"extent", $"tile").as[(Extent, Tile)] + + val tileDims = rf.tileLayerMetadata.merge.tileLayout.tileDimensions + + val (_, tlm) = df + .map { case (ext, tile) => (ProjectedExtent(ext, crs), tile) } + .rdd.collectMetadata[SpatialKey](FloatingLayoutScheme(tileDims._1, tileDims._2)) + + val md = df.select(ProjectedLayerMetadataAggregate(crs, TileDimensions(tileDims), $"extent", + serialized_literal(crs), rf_cell_type($"tile"), rf_dimensions($"tile"))) + val tlm2 = md.first() + + tlm2 should be(tlm) + } + } +} diff --git a/core/src/test/scala/astraea/spark/rasterframes/ml/NoDataFilterSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ml/NoDataFilterSpec.scala similarity index 88% rename from core/src/test/scala/astraea/spark/rasterframes/ml/NoDataFilterSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/ml/NoDataFilterSpec.scala index 17a0f25d4..1d4dbc4f6 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/ml/NoDataFilterSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/ml/NoDataFilterSpec.scala @@ -15,13 +15,16 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.ml +package org.locationtech.rasterframes.ml import java.nio.file.Files -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.TestEnvironment import org.scalatest.BeforeAndAfter /** diff --git a/core/src/test/scala/org/locationtech/rasterframes/ml/TileExploderSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ml/TileExploderSpec.scala new file mode 100644 index 000000000..2d9e2d04c --- /dev/null +++ b/core/src/test/scala/org/locationtech/rasterframes/ml/TileExploderSpec.scala @@ -0,0 +1,48 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2017 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ml + +import org.locationtech.rasterframes.TestData +import geotrellis.raster.Tile +import org.apache.spark.sql.functions.lit +import org.locationtech.rasterframes.TestEnvironment +/** + * + * @since 2/16/18 + */ +class TileExploderSpec extends TestEnvironment with TestData { + describe("Tile explode transformer") { + it("should explode tiles") { + import spark.implicits._ + val df = Seq[(Tile, Tile)]((byteArrayTile, byteArrayTile)).toDF("tile1", "tile2").withColumn("other", lit("stuff")) + + val exploder = new TileExploder() + val newSchema = exploder.transformSchema(df.schema) + + val exploded = exploder.transform(df) + assert(newSchema === exploded.schema) + assert(exploded.columns.length === 5) + assert(exploded.count() === 9) + write(exploded) + } + } +} diff --git a/core/src/test/scala/org/locationtech/rasterframes/model/LazyCRSSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/model/LazyCRSSpec.scala new file mode 100644 index 000000000..1762c402e --- /dev/null +++ b/core/src/test/scala/org/locationtech/rasterframes/model/LazyCRSSpec.scala @@ -0,0 +1,43 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.model + +import geotrellis.proj4.{CRS, LatLng, Sinusoidal, WebMercator} +import org.scalatest._ + +class LazyCRSSpec extends FunSpec with Matchers { + val sinPrj = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs" + val llPrj = "epsg:4326" + describe("LazyCRS") { + it("should implement equals") { + LazyCRS(WebMercator) should be(LazyCRS(WebMercator)) + LazyCRS(WebMercator) should be(WebMercator) + WebMercator should be(LazyCRS(WebMercator)) + LazyCRS(sinPrj) should be (Sinusoidal) + CRS.fromString(sinPrj) should be (LazyCRS(Sinusoidal)) + LazyCRS(llPrj) should be(LatLng) + LazyCRS(LatLng) should be(LatLng) + LatLng should be(LazyCRS(llPrj)) + LatLng should be(LazyCRS(LatLng)) + } + } +} diff --git a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala similarity index 69% rename from core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala rename to core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala index 4efe2b474..f34c89859 100644 --- a/core/src/test/scala/astraea/spark/rasterframes/ref/RasterRefSpec.scala +++ b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterRefSpec.scala @@ -19,15 +19,16 @@ * */ -package astraea.spark.rasterframes.ref +package org.locationtech.rasterframes.ref -import astraea.spark.rasterframes.TestEnvironment.ReadMonitor -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.expressions.transformers._ -import astraea.spark.rasterframes.expressions.accessors._ -import astraea.spark.rasterframes.ref.RasterRef.RasterRefTile +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.expressions.accessors._ +import org.locationtech.rasterframes.expressions.generators._ +import RasterRef.RasterRefTile import geotrellis.raster.Tile import geotrellis.vector.Extent +import org.apache.spark.sql.Encoders +import org.locationtech.rasterframes.TestEnvironment /** * @@ -36,6 +37,7 @@ import geotrellis.vector.Extent */ //noinspection TypeAnnotation class RasterRefSpec extends TestEnvironment with TestData { + def sub(e: Extent) = { val c = e.center val w = e.width @@ -44,15 +46,15 @@ class RasterRefSpec extends TestEnvironment with TestData { } trait Fixture { - val counter = new ReadMonitor - val src = RasterSource(remoteCOGSingleband1, Some(counter)) - val fullRaster = RasterRef(src) + val src = RasterSource(remoteCOGSingleband1) + val fullRaster = RasterRef(src, 0, None) val subExtent = sub(src.extent) - val subRaster = RasterRef(src, Option(subExtent)) + val subRaster = RasterRef(src, 0, Some(subExtent)) } import spark.implicits._ + implicit val enc = Encoders.tuple(Encoders.scalaInt, RasterRef.rrEncoder) describe("GetCRS Expression") { it("should read from RasterRef") { new Fixture { @@ -94,7 +96,6 @@ class RasterRefSpec extends TestEnvironment with TestData { new Fixture { val ds = Seq((1, RasterRefTile(fullRaster): Tile)).toDF("index", "ref") val dims = ds.select(GetDimensions($"ref")) - println(counter) assert(dims.count() === 1) assert(dims.first() !== null) } @@ -103,19 +104,18 @@ class RasterRefSpec extends TestEnvironment with TestData { new Fixture { val ds = Seq((1, RasterRefTile(subRaster): Tile)).toDF("index", "ref") val dims = ds.select(GetDimensions($"ref")) - println(counter) assert(dims.count() === 1) assert(dims.first() !== null) } } } - describe("GetExtent Expression") { + describe("GetExtent") { it("should read from RasterRef") { import spark.implicits._ new Fixture { val ds = Seq((1, fullRaster)).toDF("index", "ref") - val extent = ds.select(GetExtent($"ref")) + val extent = ds.select(rf_extent($"ref")) assert(extent.count() === 1) assert(extent.first() !== null) } @@ -124,7 +124,7 @@ class RasterRefSpec extends TestEnvironment with TestData { import spark.implicits._ new Fixture { val ds = Seq((1, subRaster)).toDF("index", "ref") - val extent = ds.select(GetExtent($"ref")) + val extent = ds.select(rf_extent($"ref")) assert(extent.count() === 1) assert(extent.first() !== null) } @@ -135,23 +135,18 @@ class RasterRefSpec extends TestEnvironment with TestData { it("should delay reading") { new Fixture { assert(subRaster.cellType === src.cellType) - assert(counter.reads === 0) } } it("should support subextents") { new Fixture { assert(subRaster.cols.toDouble === src.cols * 0.01 +- 2.0) assert(subRaster.rows.toDouble === src.rows * 0.01 +- 2.0) - assert(counter.reads === 0) //subRaster.tile.rescale(0, 255).renderPng().write("target/foo1.png") } } it("should be realizable") { new Fixture { - assert(counter.reads === 0) assert(subRaster.tile.statistics.map(_.dataCells) === Some(subRaster.cols * subRaster.rows)) - assert(counter.reads > 0) - println(counter) } } @@ -166,29 +161,52 @@ class RasterRefSpec extends TestEnvironment with TestData { val data = buf.toByteArray val in = new ObjectInputStream(new ByteArrayInputStream(data)) val recovered = in.readObject() - assert(subRaster === recovered) + subRaster should be (recovered) } } } - describe("CreateRasterRefs") { - it("should convert and expand RasterSource") { - new Fixture { - import spark.implicits._ - val df = Seq(src).toDF("src") - val refs = df.select(RasterSourceToRasterRefs($"src")) - assert(refs.count() > 1) + describe("RasterRef creation") { + it("should realize subiles of proper size") { + val src = RasterSource(remoteMODIS) + val dims = src + .layoutExtents(NOMINAL_TILE_DIMS) + .map(e => RasterRef(src, 0, Some(e))) + .map(_.dimensions) + .distinct + + forEvery(dims) { d => + d._1 should be <= NOMINAL_TILE_SIZE + d._2 should be <= NOMINAL_TILE_SIZE } } + } - it("should work with tile realization") { - new Fixture { - import spark.implicits._ - val df = Seq(src).toDF("src") - val refs = df.select(RasterSourceToRasterRefs(true, $"src")) - assert(refs.count() > 1) - } + describe("RasterSourceToRasterRefs") { + it("should convert and expand RasterSource") { + val src = RasterSource(remoteMODIS) + import spark.implicits._ + val df = Seq(src).toDF("src") + val refs = df.select(RasterSourceToRasterRefs(None, Seq(0), $"src")) + refs.count() should be (1) } + it("should properly realize subtiles") { + val src = RasterSource(remoteMODIS) + import spark.implicits._ + val df = Seq(src).toDF("src") + val refs = df.select(RasterSourceToRasterRefs(Some(NOMINAL_TILE_DIMS), Seq(0), $"src") as "proj_raster") + + refs.count() shouldBe > (1L) + + + val dims = refs.select(rf_dimensions($"proj_raster")).distinct().collect() + forEvery(dims) { r => + r.cols should be <=NOMINAL_TILE_SIZE + r.rows should be <=NOMINAL_TILE_SIZE + } + + dims.foreach(println) + } } } diff --git a/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala new file mode 100644 index 000000000..1e62b95f5 --- /dev/null +++ b/core/src/test/scala/org/locationtech/rasterframes/ref/RasterSourceSpec.scala @@ -0,0 +1,182 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.ref + +import java.net.URI + +import org.locationtech.rasterframes._ +import geotrellis.vector.Extent +import org.apache.spark.sql.rf.RasterSourceUDT +import org.locationtech.rasterframes.model.TileDimensions + + +class RasterSourceSpec extends TestEnvironment with TestData { + def sub(e: Extent) = { + val c = e.center + val w = e.width + val h = e.height + Extent(c.x, c.y, c.x + w * 0.1, c.y + h * 0.1) + } + + describe("General RasterSource") { + it("should identify as UDT") { + assert(new RasterSourceUDT() === new RasterSourceUDT()) + } + val rs = RasterSource(getClass.getResource("/L8-B8-Robinson-IL.tiff").toURI) + it("should compute nominal tile layout bounds") { + val bounds = rs.layoutBounds(TileDimensions(65, 60)) + val agg = bounds.reduce(_ combine _) + agg should be (rs.gridBounds) + } + it("should compute nominal tile layout extents") { + val extents = rs.layoutExtents(TileDimensions(63, 63)) + val agg = extents.reduce(_ combine _) + agg should be (rs.extent) + } + it("should reassemble correct grid from extents") { + val dims = TileDimensions(63, 63) + val ext = rs.layoutExtents(dims).head + val bounds = rs.layoutBounds(dims).head + rs.rasterExtent.gridBoundsFor(ext) should be (bounds) + } + it("should compute layout extents from scene with fractional gsd") { + + val rs = RasterSource(remoteMODIS) + + val dims = rs.layoutExtents(NOMINAL_TILE_DIMS) + .map(e => rs.rasterExtent.gridBoundsFor(e, false)) + .map(b => (b.width, b.height)) + .distinct + forEvery(dims) { d => + d._1 should be <= NOMINAL_TILE_SIZE + d._2 should be <= NOMINAL_TILE_SIZE + } + } + } + + describe("HTTP RasterSource") { + it("should support metadata querying over HTTP") { + withClue("remoteCOGSingleband") { + val src = RasterSource(remoteCOGSingleband1) + assert(!src.extent.isEmpty) + } + withClue("remoteCOGMultiband") { + val src = RasterSource(remoteCOGMultiband) + assert(!src.extent.isEmpty) + } + } + it("should read sub-tile") { + withClue("remoteCOGSingleband") { + val src = RasterSource(remoteCOGSingleband1) + val raster = src.read(sub(src.extent)) + assert(raster.size > 0 && raster.size < src.size) + } + withClue("remoteCOGMultiband") { + val src = RasterSource(remoteCOGMultiband) + val raster = src.read(sub(src.extent)) + assert(raster.size > 0 && raster.size < src.size) + } + } + it("should Java serialize") { + import java.io._ + val src = RasterSource(remoteCOGSingleband1) + val buf = new java.io.ByteArrayOutputStream() + val out = new ObjectOutputStream(buf) + out.writeObject(src) + out.close() + + val data = buf.toByteArray + val in = new ObjectInputStream(new ByteArrayInputStream(data)) + val recovered = in.readObject().asInstanceOf[RasterSource] + assert(src.toString === recovered.toString) + } + } + describe("File RasterSource") { + it("should support metadata querying of file") { + val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toUri + val src = RasterSource(localSrc) + assert(!src.extent.isEmpty) + } + it("should interpret no scheme as file://"){ + val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toString + val schemelessUri = new URI(localSrc) + schemelessUri.getScheme should be (null) + val src = RasterSource(schemelessUri) + assert(!src.extent.isEmpty) + } + } + + if(GDALRasterSource.hasGDAL) { + describe("GDAL Rastersource") { + val gdal = GDALRasterSource(cogPath) + val jvm = JVMGeoTiffRasterSource(cogPath) + it("should compute the same metadata as JVM RasterSource") { + gdal.cellType should be(jvm.cellType) + } + it("should compute the same dimensions as JVM RasterSource") { + val dims = TileDimensions(128, 128) + gdal.extent should be(jvm.extent) + gdal.rasterExtent should be(jvm.rasterExtent) + gdal.cellSize should be(jvm.cellSize) + gdal.layoutBounds(dims) should contain allElementsOf jvm.layoutBounds(dims) + gdal.layoutExtents(dims) should contain allElementsOf jvm.layoutExtents(dims) + } + + + it("should support vsi file paths") { + val archivePath = geotiffDir.resolve("L8-archive.zip") + val archiveURI = URI.create("gdal://vsizip/" + archivePath.toString + "/L8-RGB-VA.tiff") + val gdal = GDALRasterSource(archiveURI) + + gdal.bandCount should be (3) + } + + it("should interpret no scheme as file://") { + val localSrc = geotiffDir.resolve("LC08_B7_Memphis_COG.tiff").toString + val schemelessUri = new URI(localSrc) + val gdal = GDALRasterSource(schemelessUri) + val jvm = JVMGeoTiffRasterSource(schemelessUri) + gdal.extent should be (jvm.extent) + gdal.cellSize should be(jvm.cellSize) + } + } + } + + describe("RasterSource tile construction") { + it("should read all tiles") { + val src = RasterSource(remoteMODIS) + + val subrasters = src.readAll() + + val collected = subrasters.map(_.extent).reduceLeft(_.combine(_)) + + assert(src.extent.xmin === collected.xmin +- 0.01) + assert(src.extent.ymin === collected.ymin +- 0.01) + assert(src.extent.xmax === collected.xmax +- 0.01) + assert(src.extent.ymax === collected.ymax +- 0.01) + + val totalCells = subrasters.map(_.size).sum + + assert(totalCells === src.size) + } + } +} diff --git a/datasource/build.sbt b/datasource/build.sbt deleted file mode 100644 index b42fe3d3f..000000000 --- a/datasource/build.sbt +++ /dev/null @@ -1,35 +0,0 @@ -moduleName := "rasterframes-datasource" - -libraryDependencies ++= Seq( - geotrellis("s3").value, - spark("core").value % Provided, - spark("mllib").value % Provided, - spark("sql").value % Provided -) - -initialCommands in console := """ - |import astraea.spark.rasterframes._ - |import geotrellis.raster._ - |import geotrellis.spark.io.kryo.KryoRegistrator - |import org.apache.spark.serializer.KryoSerializer - |import org.apache.spark.sql._ - |import org.apache.spark.sql.functions._ - |import astraea.spark.rasterframes.datasource.geotrellis._ - |import astraea.spark.rasterframes.datasource.geotiff._ - |implicit val spark = SparkSession.builder() - | .master("local[*]") - | .appName(getClass.getName) - | .config("spark.serializer", classOf[KryoSerializer].getName) - | .config("spark.kryoserializer.buffer.max", "500m") - | .config("spark.kryo.registrationRequired", "false") - | .config("spark.kryo.registrator", classOf[KryoRegistrator].getName) - | .getOrCreate() - | .withRasterFrames - |spark.sparkContext.setLogLevel("ERROR") - |import spark.implicits._ - | - |""".stripMargin - -cleanupCommands in console := """ - |spark.stop() - |""".stripMargin \ No newline at end of file diff --git a/datasource/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/datasource/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister index 26a271f13..a44f6fccd 100644 --- a/datasource/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister +++ b/datasource/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -1,3 +1,5 @@ -astraea.spark.rasterframes.datasource.geotiff.DefaultSource -astraea.spark.rasterframes.datasource.geotrellis.DefaultSource -astraea.spark.rasterframes.datasource.geotrellis.GeoTrellisCatalog +org.locationtech.rasterframes.datasource.geotiff.GeoTiffDataSource +org.locationtech.rasterframes.datasource.geotrellis.GeoTrellisLayerDataSource +org.locationtech.rasterframes.datasource.geotrellis.GeoTrellisCatalog +org.locationtech.rasterframes.datasource.raster.RasterSourceDataSource +org.locationtech.rasterframes.datasource.geojson.GeoJsonDataSource diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala deleted file mode 100644 index 74acbbc98..000000000 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/DefaultSource.scala +++ /dev/null @@ -1,132 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package astraea.spark.rasterframes.datasource.geotiff - -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.util._ -import astraea.spark.rasterframes.datasource._ -import com.typesafe.scalalogging.LazyLogging -import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider} -import org.apache.spark.sql.types.LongType -import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode, functions ⇒ F} -import _root_.geotrellis.raster.io.geotiff.{GeoTiffOptions, MultibandGeoTiff, Tags, Tiled} -import _root_.geotrellis.raster.io.geotiff.compression._ -import _root_.geotrellis.raster.io.geotiff.tags.codes.ColorSpace - -/** - * Spark SQL data source over GeoTIFF files. - * @since 1/14/18 - */ -class DefaultSource extends DataSourceRegister - with RelationProvider with CreatableRelationProvider - with DataSourceOptions with LazyLogging { - def shortName() = DefaultSource.SHORT_NAME - - def path(parameters: Map[String, String]) = - uriParam(PATH_PARAM, parameters) - - def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = { - val pathO = path(parameters) - require(pathO.isDefined, "Valid URI 'path' parameter required.") - sqlContext.withRasterFrames - - val p = pathO.get - - if(p.getPath.contains("*")) { - val bandCount = parameters.get(DefaultSource.BAND_COUNT_PARAM).map(_.toInt).getOrElse(1) - GeoTiffCollectionRelation(sqlContext, p, bandCount) - } - else GeoTiffRelation(sqlContext, p) - } - - override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = { - val pathO = path(parameters) - require(pathO.isDefined, "Valid URI 'path' parameter required.") - require(pathO.get.getScheme == "file" || pathO.get.getScheme == null, "Currently only 'file://' destinations are supported") - sqlContext.withRasterFrames - - require(data.isRF, "GeoTIFF can only be constructed from a RasterFrame") - val rf = data.certify - - // If no desired image size is given, write at full size. - lazy val (fullResCols, fullResRows) = { - // get the layout size given that the tiles may be heterogenously sized - // first get any valid row and column in the spatial key structure - val sk = rf.select(SPATIAL_KEY_COLUMN).first() - - val tc = rf.tileColumns.head - - val c = rf - .where(SPATIAL_KEY_COLUMN("row") === sk.row) - .agg( - F.sum(tile_dimensions(tc)("cols") cast(LongType)) - ).first() - .getLong(0) - - val r = rf - .where(SPATIAL_KEY_COLUMN("col") === sk.col) - .agg( - F.sum(tile_dimensions(tc)("rows") cast(LongType)) - ).first() - .getLong(0) - - (c, r) - } - - val cols = numParam(DefaultSource.IMAGE_WIDTH_PARAM, parameters).getOrElse(fullResCols) - val rows = numParam(DefaultSource.IMAGE_HEIGHT_PARAM, parameters).getOrElse(fullResRows) - - require(cols <= Int.MaxValue && rows <= Int.MaxValue, s"Can't construct a GeoTIFF of size $cols x $rows. (Too big!)") - - // Should we really play traffic cop here? - if(cols.toDouble * rows * 64.0 > Runtime.getRuntime.totalMemory() * 0.5) - logger.warn(s"You've asked for the construction of a very large image ($cols x $rows), destined for ${pathO.get}. Out of memory error likely.") - - val tcols = rf.tileColumns - val raster = rf.toMultibandRaster(tcols, cols.toInt, rows.toInt) - - // We make some assumptions here.... eventually have column metadata encode this. - val colorSpace = tcols.size match { - case 3 | 4 ⇒ ColorSpace.RGB - case _ ⇒ ColorSpace.BlackIsZero - } - - val compress = parameters.get(DefaultSource.COMPRESS_PARAM).map(_.toBoolean).getOrElse(false) - val options = GeoTiffOptions(Tiled, if (compress) DeflateCompression else NoCompression, colorSpace) - val tags = Tags( - RFBuildInfo.toMap.filter(_._1.startsWith("rf")).mapValues(_.toString), - tcols.map(c ⇒ Map("RF_COL" -> c.columnName)).toList - ) - val geotiff = new MultibandGeoTiff(raster.tile, raster.extent, raster.crs, tags, options) - - logger.debug(s"Writing DataFrame to GeoTIFF ($cols x $rows) at ${pathO.get}") - geotiff.write(pathO.get.getPath) - GeoTiffRelation(sqlContext, pathO.get) - } -} - -object DefaultSource { - final val SHORT_NAME = "geotiff" - final val PATH_PARAM = "path" - final val IMAGE_WIDTH_PARAM = "imageWidth" - final val IMAGE_HEIGHT_PARAM = "imageWidth" - final val COMPRESS_PARAM = "compress" - final val BAND_COUNT_PARAM = "bandCount" -} diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/package.scala b/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/package.scala deleted file mode 100644 index 6e607c3b4..000000000 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/package.scala +++ /dev/null @@ -1,54 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package astraea.spark.rasterframes.datasource - -import java.net.URI - -import astraea.spark.rasterframes._ -import org.apache.spark.sql.{DataFrameReader, DataFrameWriter} -import shapeless.tag -import shapeless.tag.@@ - -/** - * Extension methods enabled by this module. - * - * @since 1/16/18 - */ -package object geotiff { - /** Tagged type construction for enabling type-safe extension methods for loading - * a RasterFrame in expected form. */ - type GeoTiffRasterFrameReader = DataFrameReader @@ GeoTiffRasterFrameReaderTag - trait GeoTiffRasterFrameReaderTag - - /** Adds `geotiff` format specifier to `DataFrameReader`. */ - implicit class DataFrameReaderHasGeoTiffFormat(val reader: DataFrameReader) { - def geotiff: GeoTiffRasterFrameReader = - tag[GeoTiffRasterFrameReaderTag][DataFrameReader](reader.format(DefaultSource.SHORT_NAME)) - } - - implicit class DataFrameWriterHasGeoTiffFormat[T](val writer: DataFrameWriter[T]) { - def geotiff: DataFrameWriter[T] = writer.format(DefaultSource.SHORT_NAME) - } - - /** Adds `loadRF` to appropriately tagged `DataFrameReader` */ - implicit class GeoTiffReaderWithRF(val reader: GeoTiffRasterFrameReader) { - def loadRF(path: URI): RasterFrame = reader.load(path.toASCIIString).asRF - } -} diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/DataSourceOptions.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/DataSourceOptions.scala similarity index 90% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/DataSourceOptions.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/DataSourceOptions.scala index 5baa8d67f..d620dd4fd 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/DataSourceOptions.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/DataSourceOptions.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,10 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.datasource +package org.locationtech.rasterframes.datasource /** * Key constants associated with DataFrameReader options for certain DataSource implementations. diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/DOM.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/DOM.scala similarity index 92% rename from experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/DOM.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/DOM.scala index 2dbcb7f0c..dfbbb92f3 100644 --- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/DOM.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/DOM.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2019 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,16 +15,17 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.experimental.datasource.geojson +package org.locationtech.rasterframes.datasource.geojson -import com.vividsolutions.jts.geom.{Envelope, Geometry} -import com.vividsolutions.jts.io.geojson.{GeoJsonReader, GeoJsonWriter} import geotrellis.vector.Extent -import spray.json._ +import org.locationtech.jts.geom.{Envelope, Geometry} +import org.locationtech.jts.io.geojson.{GeoJsonReader, GeoJsonWriter} import spray.json.DefaultJsonProtocol._ +import spray.json._ /** * Lightweight DOM for parsing GeoJSON feature sets. diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSource.scala similarity index 94% rename from experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSource.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSource.scala index f042fbd1c..1bda41cd7 100644 --- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSource.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSource.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2019 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,13 +15,12 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.experimental.datasource.geojson +package org.locationtech.rasterframes.datasource.geojson -import astraea.spark.rasterframes.experimental.datasource.geojson.DOM._ -import com.vividsolutions.jts.geom.Geometry import org.apache.spark.annotation.Experimental import org.apache.spark.rdd.RDD import org.apache.spark.sql.jts.JTSTypes @@ -29,6 +28,8 @@ import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationP import org.apache.spark.sql.types.{DataTypes, StringType, StructField, StructType} import org.apache.spark.sql.{DataFrame, Row, SQLContext} import org.locationtech.geomesa.spark.jts._ +import org.locationtech.jts.geom.Geometry +import org.locationtech.rasterframes.datasource.geojson.DOM._ import spray.json.DefaultJsonProtocol._ import spray.json._ diff --git a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/package.scala similarity index 94% rename from experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/package.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/package.scala index 262c255d1..6c49d75bc 100644 --- a/experimental/src/main/scala/astraea/spark/rasterframes/experimental/datasource/geojson/package.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geojson/package.scala @@ -19,7 +19,8 @@ * */ -package astraea.spark.rasterframes.experimental.datasource +package org.locationtech.rasterframes.datasource + import org.apache.spark.sql.DataFrameReader /** diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala similarity index 73% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala index 2f69d4425..3148a67d0 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionRelation.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,42 +15,35 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.datasource.geotiff +package org.locationtech.rasterframes.datasource.geotiff import java.net.URI -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.datasource.geotiff.GeoTiffCollectionRelation.Cols -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.util._ import geotrellis.proj4.CRS import geotrellis.spark.io.hadoop.HadoopGeoTiffRDD import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.hadoop.fs.Path import org.apache.spark.rdd.RDD -import org.apache.spark.sql.jts.JTSTypes import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.sources.{BaseRelation, PrunedScan} import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.sql.{Row, SQLContext} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.datasource.geotiff.GeoTiffCollectionRelation.Cols +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.util._ -/** - * - * - * @since 7/31/18 - */ +private[geotiff] case class GeoTiffCollectionRelation(sqlContext: SQLContext, uri: URI, bandCount: Int) extends BaseRelation with PrunedScan { override def schema: StructType = StructType(Seq( StructField(Cols.PATH, StringType, false), - StructField(EXTENT_COLUMN.columnName, CatalystSerializer[Extent].schema, nullable = true), - StructField(CRS_COLUMN.columnName, CatalystSerializer[CRS].schema, false) -// StructField(METADATA_COLUMN.columnName, -// DataTypes.createMapType(StringType, StringType, false) -// ) + StructField(EXTENT_COLUMN.columnName, schemaOf[Extent], nullable = true), + StructField(CRS_COLUMN.columnName, schemaOf[CRS], false) ) ++ ( if(bandCount == 1) Seq(StructField(Cols.TL, new TileUDT, false)) else for(b ← 1 to bandCount) yield StructField(Cols.TL + "_" + b, new TileUDT, nullable = true) @@ -63,14 +56,12 @@ case class GeoTiffCollectionRelation(sqlContext: SQLContext, uri: URI, bandCount val columnIndexes = requiredColumns.map(schema.fieldIndex) - - HadoopGeoTiffRDD.multiband(new Path(uri.toASCIIString), keyer, HadoopGeoTiffRDD.Options.DEFAULT) .map { case ((path, pe), mbt) ⇒ val entries = columnIndexes.map { case 0 ⇒ path - case 1 ⇒ CatalystSerializer[Extent].toRow(pe.extent) - case 2 ⇒ CatalystSerializer[CRS].toRow(pe.crs) + case 1 ⇒ pe.extent.toRow + case 2 ⇒ pe.crs.toRow case i if i > 2 ⇒ { if(bandCount == 1 && mbt.bandCount > 2) mbt.color() else mbt.band(i - 3) @@ -78,7 +69,6 @@ case class GeoTiffCollectionRelation(sqlContext: SQLContext, uri: URI, bandCount } Row(entries: _*) } - } } @@ -86,7 +76,7 @@ object GeoTiffCollectionRelation { object Cols { lazy val PATH = "path" lazy val CRS = "crs" - lazy val EX = BOUNDS_COLUMN.columnName + lazy val EX = GEOMETRY_COLUMN.columnName lazy val TL = TILE_COLUMN.columnName } } diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSource.scala new file mode 100644 index 000000000..77781a781 --- /dev/null +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSource.scala @@ -0,0 +1,191 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.datasource.geotiff + +import java.net.URI + +import _root_.geotrellis.proj4.CRS +import _root_.geotrellis.raster._ +import _root_.geotrellis.raster.io.geotiff.compression._ +import _root_.geotrellis.raster.io.geotiff.tags.codes.ColorSpace +import _root_.geotrellis.raster.io.geotiff.{GeoTiffOptions, MultibandGeoTiff, Tags, Tiled} +import _root_.geotrellis.spark._ +import com.typesafe.scalalogging.LazyLogging +import org.apache.spark.sql._ +import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.datasource._ +import org.locationtech.rasterframes.expressions.aggregates.TileRasterizerAggregate.ProjectedRasterDefinition +import org.locationtech.rasterframes.expressions.aggregates.{ProjectedLayerMetadataAggregate, TileRasterizerAggregate} +import org.locationtech.rasterframes.model.{LazyCRS, TileDimensions} +import org.locationtech.rasterframes.util._ + +/** + * Spark SQL data source over GeoTIFF files. + */ +class GeoTiffDataSource + extends DataSourceRegister with RelationProvider with CreatableRelationProvider with DataSourceOptions with LazyLogging { + import GeoTiffDataSource._ + + def shortName() = GeoTiffDataSource.SHORT_NAME + + def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = { + require(parameters.path.isDefined, "Valid URI 'path' parameter required.") + sqlContext.withRasterFrames + + val p = parameters.path.get + + if (p.getPath.contains("*")) { + val bandCount = parameters.get(GeoTiffDataSource.BAND_COUNT_PARAM).map(_.toInt).getOrElse(1) + GeoTiffCollectionRelation(sqlContext, p, bandCount) + } else GeoTiffRelation(sqlContext, p) + } + + override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], df: DataFrame): BaseRelation = { + require(parameters.path.isDefined, "Valid URI 'path' parameter required.") + val path = parameters.path.get + require(path.getScheme == "file" || path.getScheme == null, "Currently only 'file://' destinations are supported") + sqlContext.withRasterFrames + + val tileCols = df.tileColumns + + require(tileCols.nonEmpty, "Could not find any tile columns.") + + val raster = if (df.isAlreadyLayer) { + val layer = df.certify + val tlm = layer.tileLayerMetadata.merge + + // If no desired image size is given, write at full size. + val TileDimensions(cols, rows) = parameters.rasterDimensions + .getOrElse { + val actualSize = tlm.layout.toRasterExtent().gridBoundsFor(tlm.extent) + TileDimensions(actualSize.width, actualSize.height) + } + + // Should we really play traffic cop here? + if (cols.toDouble * rows * 64.0 > Runtime.getRuntime.totalMemory() * 0.5) + logger.warn( + s"You've asked for the construction of a very large image ($cols x $rows), destined for ${path}. Out of memory error likely.") + + layer.toMultibandRaster(tileCols, cols.toInt, rows.toInt) + } else { + require(parameters.crs.nonEmpty, "A destination CRS must be provided") + require(tileCols.nonEmpty, "need at least one tile column") + + // Grab CRS to project into + val destCRS = parameters.crs.get + + // Select the anchoring Tile, Extent and CRS columns + val (extCol, crsCol, tileCol) = { + // Favor "ProjectedRaster" columns + val prCols = df.projRasterColumns + if (prCols.nonEmpty) { + (rf_extent(prCols.head), rf_crs(prCols.head), rf_tile(prCols.head)) + } else { + // If no "ProjectedRaster" column, look for single Extent and CRS columns. + val crsCols = df.crsColumns + require(crsCols.size == 1, "Exactly one CRS column must be in DataFrame") + val extentCols = df.extentColumns + require(extentCols.size == 1, "Exactly one Extent column must be in DataFrame") + (extentCols.head, crsCols.head, tileCols.head) + } + } + + // Scan table and constuct what the TileLayerMetadata would be in the specified destination CRS. + val tlm: TileLayerMetadata[SpatialKey] = df + .select( + ProjectedLayerMetadataAggregate( + destCRS, + extCol, + crsCol, + rf_cell_type(tileCol), + rf_dimensions(tileCol) + )) + .first() + logger.debug(s"Contructed TileLayerMetadata: ${tlm.toString}") + + val c = ProjectedRasterDefinition(tlm) + + val config = parameters.rasterDimensions + .map { dims => + c.copy(totalCols = dims.cols, totalRows = dims.rows) + } + .getOrElse(c) + + val aggs = tileCols + .map(t => TileRasterizerAggregate(config, crsCol, extCol, rf_tile(t))("tile").as(t.columnName)) + + val agg = df.select(aggs: _*) + + val row = agg.first() + + val bands = for (i <- 0 until row.size) yield row.getAs[Tile](i) + + ProjectedRaster(MultibandTile(bands), tlm.extent, tlm.crs) + } + + val tags = Tags( + RFBuildInfo.toMap.filter(_._1.toLowerCase().contains("version")).mapValues(_.toString), + tileCols.map(c => Map("RF_COL" -> c.columnName)).toList + ) + + // We make some assumptions here.... eventually have column metadata encode this. + val colorSpace = tileCols.size match { + case 3 | 4 => ColorSpace.RGB + case _ => ColorSpace.BlackIsZero + } + + val tiffOptions = GeoTiffOptions(Tiled, if (parameters.compress) DeflateCompression else NoCompression, colorSpace) + + val geotiff = new MultibandGeoTiff(raster.tile, raster.extent, raster.crs, tags, tiffOptions) + + logger.debug(s"Writing DataFrame to GeoTIFF (${geotiff.cols} x ${geotiff.rows}) at ${path}") + geotiff.write(path.getPath) + GeoTiffRelation(sqlContext, path) + } +} + +object GeoTiffDataSource { + final val SHORT_NAME = "geotiff" + final val PATH_PARAM = "path" + final val IMAGE_WIDTH_PARAM = "imageWidth" + final val IMAGE_HEIGHT_PARAM = "imageHeight" + final val COMPRESS_PARAM = "compress" + final val CRS_PARAM = "crs" + final val BAND_COUNT_PARAM = "bandCount" + + private[geotiff] implicit class ParamsDictAccessors(val parameters: Map[String, String]) extends AnyVal { + def path: Option[URI] = uriParam(PATH_PARAM, parameters) + def compress: Boolean = parameters.get(COMPRESS_PARAM).exists(_.toBoolean) + def crs: Option[CRS] = parameters.get(CRS_PARAM).map(s => LazyCRS(s)) + def rasterDimensions: Option[TileDimensions] = { + numParam(IMAGE_WIDTH_PARAM, parameters) + .zip(numParam(IMAGE_HEIGHT_PARAM, parameters)) + .map { + case (cols, rows) => + require(cols <= Int.MaxValue && rows <= Int.MaxValue, s"Can't construct a GeoTIFF of size $cols x $rows. (Too big!)") + TileDimensions(cols.toInt, rows.toInt) + } + .headOption + } + } +} diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffRelation.scala similarity index 85% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffRelation.scala index 8503171c8..b08ebc830 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffRelation.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffRelation.scala @@ -15,15 +15,18 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotiff +package org.locationtech.rasterframes.datasource.geotiff import java.net.URI -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.encoders.CatalystSerializer -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.util._ +import com.typesafe.scalalogging.LazyLogging import geotrellis.proj4.CRS import geotrellis.spark._ import geotrellis.spark.io._ @@ -66,8 +69,8 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio StructType(Seq( StructField(SPATIAL_KEY_COLUMN.columnName, skSchema, nullable = false, skMetadata), - StructField(EXTENT_COLUMN.columnName, CatalystSerializer[Extent].schema, nullable = true), - StructField(CRS_COLUMN.columnName, CatalystSerializer[CRS].schema, nullable = true), + StructField(EXTENT_COLUMN.columnName, schemaOf[Extent], nullable = true), + StructField(CRS_COLUMN.columnName, schemaOf[CRS], nullable = true), StructField(METADATA_COLUMN.columnName, DataTypes.createMapType(StringType, StringType, false) ) @@ -85,8 +88,7 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio val trans = tlm.mapTransform val metadata = info.tags.headTags - val extSer = CatalystSerializer[Extent] - val encodedCRS = CatalystSerializer[CRS].toRow(tlm.crs) + val encodedCRS = tlm.crs.toRow if(info.segmentLayout.isTiled) { // TODO: Figure out how to do tile filtering via the range reader. @@ -98,7 +100,7 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio val gb = trans.extentToBounds(pe.extent) val entries = columnIndexes.map { case 0 => SpatialKey(gb.colMin, gb.rowMin) - case 1 => extSer.toRow(pe.extent) + case 1 => pe.extent.toRow case 2 => encodedCRS case 3 => metadata case n => tiles.band(n - 4) @@ -107,16 +109,15 @@ case class GeoTiffRelation(sqlContext: SQLContext, uri: URI) extends BaseRelatio } } else { - logger.warn("GeoTIFF is not already tiled. In-memory read required: " + uri) + //logger.warn("GeoTIFF is not already tiled. In-memory read required: " + uri) val geotiff = HadoopGeoTiffReader.readMultiband(new Path(uri)) val rdd = sqlContext.sparkContext.makeRDD(Seq((geotiff.projectedExtent, Shims.toArrayTile(geotiff.tile)))) - rdd.tileToLayout(tlm) .map { case (sk, tiles) ⇒ val entries = columnIndexes.map { case 0 => sk - case 1 => extSer.toRow(trans.keyToExtent(sk)) + case 1 => trans.keyToExtent(sk).toRow case 2 => encodedCRS case 3 => metadata case n => tiles.band(n - 4) diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/package.scala new file mode 100644 index 000000000..75bdc7e76 --- /dev/null +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotiff/package.scala @@ -0,0 +1,81 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.datasource +import java.net.URI + +import org.apache.spark.sql.{DataFrameReader, DataFrameWriter} +import org.locationtech.rasterframes._ +import _root_.geotrellis.proj4.CRS +import shapeless.tag.@@ +import shapeless.tag + +package object geotiff { + /** Tagged type construction for enabling type-safe extension methods for loading + * a RasterFrameLayer from a single GeoTiff. */ + type GeoTiffRasterFrameReader = DataFrameReader @@ GeoTiffRasterFrameReaderTag + trait GeoTiffRasterFrameReaderTag + + /** Tagged type construction for enabling type-safe extension methods for writing + * a RasterFrame to a geotiff. */ + type GeoTiffRasterFrameWriter[T] = DataFrameWriter[T] @@ GeoTiffRasterFrameWriterTag + trait GeoTiffRasterFrameWriterTag + + /** Adds `geotiff` format specifier to `DataFrameReader`. */ + implicit class DataFrameReaderHasGeoTiffFormat(val reader: DataFrameReader) { + @deprecated("Use `raster` instead.", "7/1/2019") + def geotiff: GeoTiffRasterFrameReader = + tag[GeoTiffRasterFrameReaderTag][DataFrameReader]( + reader.format(GeoTiffDataSource.SHORT_NAME) + ) + } + + implicit class DataFrameWriterHasGeoTiffFormat[T](val writer: DataFrameWriter[T]) { + def geotiff: GeoTiffRasterFrameWriter[T] = + tag[GeoTiffRasterFrameWriterTag][DataFrameWriter[T]]( + writer.format(GeoTiffDataSource.SHORT_NAME) + ) + + def withDimensions(cols: Int, rows: Int): GeoTiffRasterFrameWriter[T] = + tag[GeoTiffRasterFrameWriterTag][DataFrameWriter[T]]( + writer + .option(GeoTiffDataSource.IMAGE_WIDTH_PARAM, cols) + .option(GeoTiffDataSource.IMAGE_HEIGHT_PARAM, rows) + ) + + def withCompression: GeoTiffRasterFrameWriter[T] = + tag[GeoTiffRasterFrameWriterTag][DataFrameWriter[T]]( + writer + .option(GeoTiffDataSource.COMPRESS_PARAM, true) + ) + def withCRS(crs: CRS): GeoTiffRasterFrameWriter[T] = + tag[GeoTiffRasterFrameWriterTag][DataFrameWriter[T]]( + writer + .option(GeoTiffDataSource.CRS_PARAM, crs.toProj4String) + ) + } + + /** Adds `loadLayer` to appropriately tagged `DataFrameReader` */ + implicit class GeoTiffReaderWithRF(val reader: GeoTiffRasterFrameReader) { + @deprecated("Use `raster` instead.", "7/1/2019") + def loadLayer(path: URI): RasterFrameLayer = reader.load(path.toASCIIString).asLayer + } +} diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala similarity index 93% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala index 5af7f1e3f..11edc1d5f 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalog.scala @@ -15,23 +15,23 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis import java.net.URI -import astraea.spark.rasterframes -import astraea.spark.rasterframes.datasource.geotrellis.GeoTrellisCatalog.GeoTrellisCatalogRelation -import astraea.spark.rasterframes.util.time import geotrellis.spark.io.AttributeStore import org.apache.spark.annotation.Experimental import org.apache.spark.rdd.RDD import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ import org.apache.spark.sql.rf.VersionShims import org.apache.spark.sql.sources._ -import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.StructType +import org.locationtech.rasterframes.datasource.geotrellis.GeoTrellisCatalog.GeoTrellisCatalogRelation import spray.json.DefaultJsonProtocol._ import spray.json._ diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisLayerDataSource.scala similarity index 90% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisLayerDataSource.scala index 63e88e25a..d12ea1e17 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/DefaultSource.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisLayerDataSource.scala @@ -15,14 +15,16 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis import java.net.URI -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.datasource.DataSourceOptions +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.datasource.DataSourceOptions import geotrellis.spark._ import geotrellis.spark.io._ import geotrellis.spark.io.index.ZCurveKeyIndexMethod @@ -37,9 +39,9 @@ import scala.util.Try * DataSource over a GeoTrellis layer store. */ @Experimental -class DefaultSource extends DataSourceRegister +class GeoTrellisLayerDataSource extends DataSourceRegister with RelationProvider with CreatableRelationProvider with DataSourceOptions { - def shortName(): String = DefaultSource.SHORT_NAME + def shortName(): String = GeoTrellisLayerDataSource.SHORT_NAME /** * Create a GeoTrellis data source. @@ -79,8 +81,8 @@ class DefaultSource extends DataSourceRegister require(layerName.isDefined, s"'$LAYER_PARAM' parameter for raster layer name required.") require(zoom.isDefined, s"Integer '$ZOOM_PARAM' parameter for raster layer zoom level required.") - val rf = data.asRFSafely - .getOrElse(throw new IllegalArgumentException("Only a valid RasterFrame can be saved as a GeoTrellis layer")) + val rf = data.asLayerSafely + .getOrElse(throw new IllegalArgumentException("Only a valid RasterFrameLayer can be saved as a GeoTrellis layer")) val tileColumn = parameters.get(TILE_COLUMN_PARAM).map(c ⇒ rf(c)) @@ -110,6 +112,6 @@ class DefaultSource extends DataSourceRegister } } -object DefaultSource { +object GeoTrellisLayerDataSource { final val SHORT_NAME = "geotrellis" } diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala similarity index 93% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala index 918f43015..343f4683d 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisRelation.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2017-2018 Azavea & Astraea, Inc. + * Copyright 2017-2019 Astraea, Inc. & Azavea * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,31 +15,33 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis import java.io.UnsupportedEncodingException import java.net.URI import java.sql.{Date, Timestamp} import java.time.{ZoneOffset, ZonedDateTime} -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.datasource.geotrellis.GeoTrellisRelation.{C, TileFeatureData} -import astraea.spark.rasterframes.datasource.geotrellis.TileFeatureSupport._ -import astraea.spark.rasterframes.rules.splitFilters -import astraea.spark.rasterframes.rules.SpatialFilters.{Contains ⇒ sfContains, Intersects ⇒ sfIntersects} -import astraea.spark.rasterframes.rules.SpatialRelationReceiver -import astraea.spark.rasterframes.rules.TemporalFilters.{BetweenDates, BetweenTimes} -import astraea.spark.rasterframes.util.SubdivideSupport._ -import astraea.spark.rasterframes.util._ -import com.vividsolutions.jts.geom +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.datasource.geotrellis.TileFeatureSupport._ +import org.locationtech.rasterframes.rules.splitFilters +import org.locationtech.rasterframes.rules.SpatialFilters.{Contains => sfContains, Intersects => sfIntersects} +import org.locationtech.rasterframes.rules.SpatialRelationReceiver +import org.locationtech.rasterframes.rules.TemporalFilters.{BetweenDates, BetweenTimes} +import org.locationtech.rasterframes.util.SubdivideSupport._ +import org.locationtech.rasterframes.util._ +import com.typesafe.scalalogging.LazyLogging +import org.locationtech.jts.geom import geotrellis.raster.{CellGrid, MultibandTile, Tile, TileFeature} import geotrellis.spark.io._ import geotrellis.spark.io.avro.AvroRecordCodec import geotrellis.spark.util.KryoWrapper import geotrellis.spark.{LayerId, Metadata, SpatialKey, TileLayerMetadata, _} -import geotrellis.util.{LazyLogging, _} +import geotrellis.util._ import geotrellis.vector._ import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord @@ -49,6 +51,7 @@ import org.apache.spark.sql.rf.TileUDT import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.sql.{Row, SQLContext, sources} +import org.locationtech.rasterframes.datasource.geotrellis.GeoTrellisRelation.{C, TileFeatureData} import scala.reflect.ClassTag import scala.reflect.runtime.universe._ @@ -348,6 +351,6 @@ object GeoTrellisRelation { lazy val TS = TIMESTAMP_COLUMN.columnName lazy val TL = TILE_COLUMN.columnName lazy val TF = TILE_FEATURE_DATA_COLUMN.columnName - lazy val EX = BOUNDS_COLUMN.columnName + lazy val EX = GEOMETRY_COLUMN.columnName } } diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/Layer.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/Layer.scala similarity index 85% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/Layer.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/Layer.scala index a659de2ff..9f90c96fd 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/Layer.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/Layer.scala @@ -15,16 +15,18 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis import java.net.URI -import astraea.spark.rasterframes -import astraea.spark.rasterframes.encoders.DelegatingSubfieldEncoder +import org.locationtech.rasterframes.encoders.DelegatingSubfieldEncoder import geotrellis.spark.LayerId import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.locationtech.rasterframes /** * /** Connector between a GT `LayerId` and the path in which it lives. */ diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/MergeableData.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/MergeableData.scala similarity index 95% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/MergeableData.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/MergeableData.scala index 73d537866..34bd6536b 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/MergeableData.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/MergeableData.scala @@ -15,9 +15,11 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis trait MergeableData[D] { def merge(l:D, r:D): D diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupport.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupport.scala similarity index 96% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupport.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupport.scala index 6691d6d51..67ea65510 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupport.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupport.scala @@ -15,11 +15,13 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes.util._ import geotrellis.raster.crop.{Crop, TileCropMethods} import geotrellis.raster.mask.TileMaskMethods import geotrellis.raster.merge.TileMergeMethods diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/package.scala similarity index 79% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/package.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/package.scala index 545c1f236..c4a7dc425 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/geotrellis/package.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/geotrellis/package.scala @@ -15,24 +15,20 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource +package org.locationtech.rasterframes.datasource import java.net.URI -import _root_.geotrellis.spark.LayerId -import astraea.spark.rasterframes.datasource.geotrellis.DefaultSource._ -import astraea.spark.rasterframes.{RasterFrame, _} import org.apache.spark.sql._ -import org.apache.spark.sql.functions.col -import shapeless.tag +import org.apache.spark.sql.functions._ +import _root_.geotrellis.spark.LayerId +import org.locationtech.rasterframes._ import shapeless.tag.@@ +import shapeless.tag -/** - * Extension methods for literate and type-safe loading of geotrellis layers. - * - * @since 1/12/18 - */ package object geotrellis extends DataSourceOptions { implicit val layerEncoder = Layer.layerEncoder @@ -40,11 +36,11 @@ package object geotrellis extends DataSourceOptions { def geotrellis_layer = col("layer").as[Layer] /** Tagged type construction for enabling type-safe extension methods for loading - * a RasterFrame from a GeoTrellis layer. */ + * a RasterFrameLayer from a GeoTrellis layer. */ type GeoTrellisRasterFrameReader = DataFrameReader @@ GeoTrellisRasterFrameReaderTag trait GeoTrellisRasterFrameReaderTag /** Tagged type construction for enabling type-safe extension methods for writing - * a RasterFrame to a GeoTrellis layer. */ + * a RasterFrameLayer to a GeoTrellis layer. */ type GeoTrellisRasterFrameWriter[T] = DataFrameWriter[T] @@ GeoTrellisRasterFrameWriterTag trait GeoTrellisRasterFrameWriterTag @@ -56,12 +52,12 @@ package object geotrellis extends DataSourceOptions { reader.format("geotrellis-catalog").load(base.toASCIIString) def geotrellis: GeoTrellisRasterFrameReader = - tag[GeoTrellisRasterFrameReaderTag][DataFrameReader](reader.format(SHORT_NAME)) + tag[GeoTrellisRasterFrameReaderTag][DataFrameReader](reader.format(GeoTrellisLayerDataSource.SHORT_NAME)) } implicit class DataFrameWriterHasGeotrellisFormat[T](val writer: DataFrameWriter[T]) { def geotrellis: GeoTrellisRasterFrameWriter[T] = - tag[GeoTrellisRasterFrameWriterTag][DataFrameWriter[T]](writer.format(SHORT_NAME)) + tag[GeoTrellisRasterFrameWriterTag][DataFrameWriter[T]](writer.format(GeoTrellisLayerDataSource.SHORT_NAME)) } implicit class GeoTrellisWriterAddLayer[T](val writer: GeoTrellisRasterFrameWriter[T]) { @@ -75,25 +71,25 @@ package object geotrellis extends DataSourceOptions { .option("path", layer.base.toASCIIString) } - /** Extension methods for loading a RasterFrame from a tagged `DataFrameReader`. */ + /** Extension methods for loading a RasterFrameLayer from a tagged `DataFrameReader`. */ implicit class GeoTrellisReaderWithRF(val reader: GeoTrellisRasterFrameReader) { def withTileSubdivisions(divs: Int): GeoTrellisRasterFrameReader = tag[GeoTrellisRasterFrameReaderTag][DataFrameReader]( - reader.option(TILE_SUBDIVISIONS_PARAM, divs) + reader.option(TILE_SUBDIVISIONS_PARAM, divs.toLong) ) def withNumPartitions(partitions: Int): GeoTrellisRasterFrameReader = tag[GeoTrellisRasterFrameReaderTag][DataFrameReader]( - reader.option(NUM_PARTITIONS_PARAM, partitions) + reader.option(NUM_PARTITIONS_PARAM, partitions.toLong) ) - def loadRF(uri: URI, id: LayerId): RasterFrame = + def loadLayer(uri: URI, id: LayerId): RasterFrameLayer = reader .option(LAYER_PARAM, id.name) .option(ZOOM_PARAM, id.zoom.toString) .load(uri.toASCIIString) - .asRF + .asLayer - def loadRF(layer: Layer): RasterFrame = loadRF(layer.base, layer.id) + def loadLayer(layer: Layer): RasterFrameLayer = loadLayer(layer.base, layer.id) } } diff --git a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala similarity index 92% rename from datasource/src/main/scala/astraea/spark/rasterframes/datasource/package.scala rename to datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala index 6962e9a11..9a649bb94 100644 --- a/datasource/src/main/scala/astraea/spark/rasterframes/datasource/package.scala +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/package.scala @@ -15,14 +15,14 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes +package org.locationtech.rasterframes import java.net.URI -import org.apache.spark.sql.sources.{And, Filter} - import scala.util.Try /** diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala new file mode 100644 index 000000000..6cea717ec --- /dev/null +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSource.scala @@ -0,0 +1,154 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.datasource.raster + +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.util._ +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider} +import org.locationtech.rasterframes.model.TileDimensions + +class RasterSourceDataSource extends DataSourceRegister with RelationProvider { + import RasterSourceDataSource._ + override def shortName(): String = SHORT_NAME + override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { + val bands = parameters.bandIndexes + val tiling = parameters.tileDims + val lazyTiles = parameters.lazyTiles + val spec = parameters.pathSpec + val catRef = spec.fold(_.registerAsTable(sqlContext), identity) + RasterSourceRelation(sqlContext, catRef, bands, tiling, lazyTiles) + } +} + +object RasterSourceDataSource { + final val SHORT_NAME = "raster" + final val PATH_PARAM = "path" + final val PATHS_PARAM = "paths" + final val BAND_INDEXES_PARAM = "bandIndexes" + final val TILE_DIMS_PARAM = "tileDimensions" + final val CATALOG_TABLE_PARAM = "catalogTable" + final val CATALOG_TABLE_COLS_PARAM = "catalogColumns" + final val CATALOG_CSV_PARAM = "catalogCSV" + final val LAZY_TILES_PARAM = "lazyTiles" + + final val DEFAULT_COLUMN_NAME = PROJECTED_RASTER_COLUMN.columnName + + trait WithBandColumns { + def bandColumnNames: Seq[String] + } + /** Container for specifying raster paths. */ + case class RasterSourceCatalog(csv: String, bandColumnNames: String*) extends WithBandColumns { + def registerAsTable(sqlContext: SQLContext): RasterSourceCatalogRef = { + import sqlContext.implicits._ + val lines = csv + .split(Array('\n','\r')) + .map(_.trim) + .filter(_.nonEmpty) + + val dsLines = sqlContext.createDataset(lines) + val catalog = sqlContext.read + .option("header", "true") + .option("ignoreTrailingWhiteSpace", true) + .option("ignoreLeadingWhiteSpace", true) + .csv(dsLines) + + val tmpName = tmpTableName() + catalog.createOrReplaceTempView(tmpName) + + val cols = if (bandColumnNames.isEmpty) catalog.columns.toSeq + else bandColumnNames + + RasterSourceCatalogRef(tmpName, cols: _*) + } + } + + object RasterSourceCatalog { + def apply(singlebandPaths: Seq[String]): Option[RasterSourceCatalog] = + if (singlebandPaths.isEmpty) None + else { + val header = DEFAULT_COLUMN_NAME + val csv = header + "\n" + singlebandPaths.mkString("\n") + Some(new RasterSourceCatalog(csv, header)) + } + } + + /** Container for specifying where to select raster paths from. */ + case class RasterSourceCatalogRef(tableName: String, bandColumnNames: String*) extends WithBandColumns + + private[raster] + implicit class ParamsDictAccessors(val parameters: Map[String, String]) extends AnyVal { + def tokenize(csv: String): Seq[String] = csv.split(',').map(_.trim) + + def tileDims: Option[TileDimensions] = + parameters.get(TILE_DIMS_PARAM) + .map(tokenize(_).map(_.toInt)) + .map { case Seq(cols, rows) => TileDimensions(cols, rows)} + + def bandIndexes: Seq[Int] = parameters + .get(BAND_INDEXES_PARAM) + .map(tokenize(_).map(_.toInt)) + .getOrElse(Seq(0)) + + + def lazyTiles: Boolean = parameters + .get(LAZY_TILES_PARAM).forall(_.toBoolean) + + def catalog: Option[RasterSourceCatalog] = { + val paths = ( + parameters + .get(PATHS_PARAM) + .toSeq + .flatMap(_.split(Array('\n','\r'))) ++ + parameters + .get(RasterSourceDataSource.PATH_PARAM) + .toSeq + ).filter(_.nonEmpty) + + RasterSourceCatalog(paths) + .orElse(parameters + .get(CATALOG_CSV_PARAM) + .map(RasterSourceCatalog(_, catalogTableCols: _*)) + ) + } + + def catalogTableCols: Seq[String] = parameters + .get(CATALOG_TABLE_COLS_PARAM) + .map(tokenize(_).filter(_.nonEmpty).toSeq) + .getOrElse(Seq.empty) + + def catalogTable: Option[RasterSourceCatalogRef] = parameters + .get(CATALOG_TABLE_PARAM) + .map(p => RasterSourceCatalogRef(p, catalogTableCols: _*)) + + def pathSpec: Either[RasterSourceCatalog, RasterSourceCatalogRef] = { + (catalog, catalogTable) match { + case (Some(f), None) => Left(f) + case (None, Some(p)) => Right(p) + case (None, None) => throw new IllegalArgumentException( + s"Unable to interpret paths from: ${parameters.mkString("\n", "\n", "\n")}") + case _ => throw new IllegalArgumentException( + "Only one of a set of file paths OR a paths table column may be provided.") + } + } + } +} diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala new file mode 100644 index 000000000..6af519f56 --- /dev/null +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceRelation.scala @@ -0,0 +1,136 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.datasource.raster + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.sources.{BaseRelation, TableScan} +import org.apache.spark.sql.types.{StringType, StructField, StructType} +import org.apache.spark.sql.{DataFrame, Row, SQLContext} +import org.locationtech.rasterframes.datasource.raster.RasterSourceDataSource.RasterSourceCatalogRef +import org.locationtech.rasterframes.encoders.CatalystSerializer._ +import org.locationtech.rasterframes.expressions.generators.{RasterSourceToRasterRefs, RasterSourceToTiles} +import org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.bandNames +import org.locationtech.rasterframes.expressions.transformers.{RasterRefToTile, URIToRasterSource} +import org.locationtech.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.tiles.ProjectedRasterTile + +/** + * Constructs a Spark Relation over one or more RasterSource paths. + * @param sqlContext Query context + * @param catalogTable Specification of raster path sources + * @param bandIndexes band indexes to fetch + * @param subtileDims how big to tile/subdivide rasters info + */ +case class RasterSourceRelation( + sqlContext: SQLContext, + catalogTable: RasterSourceCatalogRef, + bandIndexes: Seq[Int], + subtileDims: Option[TileDimensions], + lazyTiles: Boolean +) extends BaseRelation with TableScan { + + lazy val inputColNames = catalogTable.bandColumnNames + + def pathColNames = inputColNames + .map(_ + "_path") + + def srcColNames = inputColNames + .map(_ + "_src") + + def refColNames = srcColNames + .flatMap(bandNames(_, bandIndexes)) + .map(_ + "_ref") + + def tileColNames = inputColNames + .flatMap(bandNames(_, bandIndexes)) + + lazy val extraCols: Seq[StructField] = { + val catalog = sqlContext.table(catalogTable.tableName) + catalog.schema.fields.filter(f => !catalogTable.bandColumnNames.contains(f.name)) + } + + override def schema: StructType = { + val tileSchema = schemaOf[ProjectedRasterTile] + val paths = for { + pathCol <- pathColNames + } yield StructField(pathCol, StringType, false) + val tiles = for { + tileColName <- tileColNames + } yield StructField(tileColName, tileSchema, true) + + StructType(paths ++ tiles ++ extraCols) + } + + override def buildScan(): RDD[Row] = { + import sqlContext.implicits._ + + // The general transformaion is: + // input -> path -> src -> ref -> tile + // Each step is broken down for readability + val inputs: DataFrame = sqlContext.table(catalogTable.tableName) + + // Basically renames the input columns to have the '_path' suffix + val pathsAliasing = for { + (input, path) <- inputColNames.zip(pathColNames) + } yield col(input).as(path) + + // Wraps paths in a RasterSource + val srcs = for { + (pathColName, srcColName) <- pathColNames.zip(srcColNames) + } yield URIToRasterSource(col(pathColName)) as srcColName + + // Add path columns + val withPaths = inputs + .select($"*" +: pathsAliasing: _*) + + // Path columns have to be manually pulled along through each step. Resolve columns once + // and reused with each select. + val paths = pathColNames.map(withPaths.apply) + + // Input columns along for the ride. + val extras = extraCols.map(f => inputs(f.name)) + + val df = if (lazyTiles) { + // Expand RasterSource into multiple columns per band, and multiple rows per tile + // There's some unintentional fragililty here in that the structure of the expression + // is expected to line up with our column structure here. + val refs = RasterSourceToRasterRefs(subtileDims, bandIndexes, srcs: _*) as refColNames + + // RasterSourceToRasterRef is a generator, which means you have to do the Tile conversion + // in a separate select statement (Query planner doesn't know how many columns ahead of time). + val refsToTiles = for { + (refColName, tileColName) <- refColNames.zip(tileColNames) + } yield RasterRefToTile(col(refColName)) as tileColName + + withPaths + .select(extras ++ paths :+ refs: _*) + .select(paths ++ refsToTiles ++ extras: _*) + } + else { + val tiles = RasterSourceToTiles(subtileDims, bandIndexes, srcs: _*) as tileColNames + withPaths + .select((paths :+ tiles) ++ extras: _*) + } + df.rdd + } +} diff --git a/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/package.scala b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/package.scala new file mode 100644 index 000000000..d85f435d2 --- /dev/null +++ b/datasource/src/main/scala/org/locationtech/rasterframes/datasource/raster/package.scala @@ -0,0 +1,93 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.datasource + +import java.net.URI +import java.util.UUID + +import org.apache.spark.sql.{DataFrame, DataFrameReader} +import shapeless.tag +import shapeless.tag.@@ +package object raster { + + private[raster] def tmpTableName() = UUID.randomUUID().toString.replace("-", "") + + trait RasterSourceDataFrameReaderTag + type RasterSourceDataFrameReader = DataFrameReader @@ RasterSourceDataFrameReaderTag + + /** Adds `raster` format specifier to `DataFrameReader`. */ + implicit class DataFrameReaderHasRasterSourceFormat(val reader: DataFrameReader) { + def raster: RasterSourceDataFrameReader = + tag[RasterSourceDataFrameReaderTag][DataFrameReader]( + reader.format(RasterSourceDataSource.SHORT_NAME)) + } + + /** Adds option methods relevant to RasterSourceDataSource. */ + implicit class RasterSourceDataFrameReaderHasOptions(val reader: RasterSourceDataFrameReader) { + /** Set the zero-based band indexes to read. Defaults to Seq(0). */ + def withBandIndexes(bandIndexes: Int*): RasterSourceDataFrameReader = + tag[RasterSourceDataFrameReaderTag][DataFrameReader]( + reader.option(RasterSourceDataSource.BAND_INDEXES_PARAM, bandIndexes.mkString(","))) + + def withTileDimensions(cols: Int, rows: Int): RasterSourceDataFrameReader = + tag[RasterSourceDataFrameReaderTag][DataFrameReader]( + reader.option(RasterSourceDataSource.TILE_DIMS_PARAM, s"$cols,$rows") + ) + + /** Indicate if tile reading should be delayed until cells are fetched. Defaults to `true`. */ + def withLazyTiles(state: Boolean): RasterSourceDataFrameReader = + tag[RasterSourceDataFrameReaderTag][DataFrameReader]( + reader.option(RasterSourceDataSource.LAZY_TILES_PARAM, state)) + + def fromCatalog(catalog: DataFrame, bandColumnNames: String*): RasterSourceDataFrameReader = + tag[RasterSourceDataFrameReaderTag][DataFrameReader] { + val tmpName = tmpTableName() + catalog.createOrReplaceTempView(tmpName) + reader + .option(RasterSourceDataSource.CATALOG_TABLE_PARAM, tmpName) + .option(RasterSourceDataSource.CATALOG_TABLE_COLS_PARAM, bandColumnNames.mkString(",")): DataFrameReader + } + + def fromCatalog(tableName: String, bandColumnNames: String*): RasterSourceDataFrameReader = + tag[RasterSourceDataFrameReaderTag][DataFrameReader]( + reader.option(RasterSourceDataSource.CATALOG_TABLE_PARAM, tableName) + .option(RasterSourceDataSource.CATALOG_TABLE_COLS_PARAM, bandColumnNames.mkString(",")) + ) + + def fromCSV(catalogCSV: String, bandColumnNames: String*): RasterSourceDataFrameReader = + tag[RasterSourceDataFrameReaderTag][DataFrameReader]( + reader.option(RasterSourceDataSource.CATALOG_CSV_PARAM, catalogCSV) + .option(RasterSourceDataSource.CATALOG_TABLE_COLS_PARAM, bandColumnNames.mkString(",")) + ) + + def from(newlineDelimPaths: String): RasterSourceDataFrameReader = + tag[RasterSourceDataFrameReaderTag][DataFrameReader]( + reader.option(RasterSourceDataSource.PATHS_PARAM, newlineDelimPaths) + ) + + def from(paths: Seq[String]): RasterSourceDataFrameReader = + from(paths.mkString("\n")) + + def from(uris: Seq[URI])(implicit d: DummyImplicit): RasterSourceDataFrameReader = + from(uris.map(_.toASCIIString)) + } +} diff --git a/experimental/src/test/resources/buildings.geojson b/datasource/src/test/resources/buildings.geojson similarity index 100% rename from experimental/src/test/resources/buildings.geojson rename to datasource/src/test/resources/buildings.geojson diff --git a/experimental/src/test/resources/example.geojson b/datasource/src/test/resources/example.geojson similarity index 100% rename from experimental/src/test/resources/example.geojson rename to datasource/src/test/resources/example.geojson diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala b/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala deleted file mode 100644 index 3bdeecd81..000000000 --- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala +++ /dev/null @@ -1,117 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ -package astraea.spark.rasterframes.datasource.geotiff - -import java.nio.file.Paths - -import astraea.spark.rasterframes._ -import org.apache.spark.sql.functions._ - -/** - * @since 1/14/18 - */ -class GeoTiffDataSourceSpec - extends TestEnvironment with TestData { - - val cogPath = getClass.getResource("/LC08_RGB_Norfolk_COG.tiff").toURI - val nonCogPath = getClass.getResource("/L8-B8-Robinson-IL.tiff").toURI - val l8samplePath = getClass.getResource("/L8-B1-Elkton-VA.tiff").toURI - - describe("GeoTiff reading") { - - it("should read sample GeoTiff") { - val rf = spark.read - .geotiff - .loadRF(cogPath) - - assert(rf.count() > 10) - } - - it("should lay out tiles correctly"){ - - val rf = spark.read - .geotiff - .loadRF(cogPath) - - val tlm = rf.tileLayerMetadata.left.get - val gb = tlm.gridBounds - assert(gb.colMax > gb.colMin) - assert(gb.rowMax > gb.rowMin) - } - - it("should lay out tiles correctly for non-tiled tif") { - val rf = spark.read - .geotiff - .loadRF(nonCogPath) - - println(rf.count()) - rf.show(false) - - assert(rf.count() > 1) - - import org.apache.spark.sql.functions._ - logger.info( - rf.agg( - min(col("spatial_key.row")) as "rowmin", - max(col("spatial_key.row")) as "rowmax", - min(col("spatial_key.col")) as "colmin", - max(col("spatial_key.col")) as "colmax" - - ).first.toSeq.toString() - ) - val tlm = rf.tileLayerMetadata.left.get - val gb = tlm.gridBounds - assert(gb.rowMax > gb.rowMin) - assert(gb.colMax > gb.colMin) - - } - - it("should read in correctly check-summed contents") { - // c.f. TileStatsSpec -> computing statistics over tiles -> should compute tile statistics -> sum - val rf = spark.read.geotiff.loadRF(l8samplePath) - val expected = 309149454 // computed with rasterio - val result = rf.agg( - sum(tile_sum(rf("tile"))) - ).collect().head.getDouble(0) - - assert(result === expected) - } - - it("should write GeoTIFF RF to parquet") { - val rf = spark.read - .geotiff - .loadRF(cogPath) - assert(write(rf)) - } - - it("should write GeoTIFF") { - val rf = spark.read - .geotiff - .loadRF(cogPath) - - logger.info(s"Read extent: ${rf.tileLayerMetadata.merge.extent}") - - val out = Paths.get("target", "example-geotiff.tiff") - logger.info(s"Writing to $out") - noException shouldBe thrownBy { - rf.write.geotiff.save(out.toString) - } - } - } -} diff --git a/datasource/src/test/scala/examples/Creating.scala b/datasource/src/test/scala/examples/Creating.scala deleted file mode 100644 index 174bbaa3f..000000000 --- a/datasource/src/test/scala/examples/Creating.scala +++ /dev/null @@ -1,238 +0,0 @@ -/* - * This software is licensed under the Apache 2 license, quoted below. - * - * Copyright 2018 Astraea, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * [http://www.apache.org/licenses/LICENSE-2.0] - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - */ - -package examples - -import java.io.File -import java.nio.file.Files - -import geotrellis.raster.{Raster, Tile} - -/** - * Examples of creating RasterFrames - * - * @since 1/16/18 - */ -object Creating extends App { -/* -# Creating RasterFrames - -## Initialization - -There are a couple of setup steps necessary anytime you want to work with RasterFrames. the first is to import the API symbols into scope: - -*/ -// tut:silent -import astraea.spark.rasterframes._ -import org.apache.spark.sql._ - - -/* -Next, initialize the `SparkSession`, and call the `withRasterFrames` method on it: -*/ - -// tut:silent -implicit val spark = SparkSession.builder(). - master("local").appName("RasterFrames"). - getOrCreate(). - withRasterFrames - -/* -And, ss is standard Spark SQL practice, we import additional DataFrame support: -*/ - -// tut:silent -import spark.implicits._ -// tut:invisible -spark.sparkContext.setLogLevel("ERROR") - - -/* -Now we are ready to create a RasterFrame. - -## Reading a GeoTIFF - -The most straightforward way to create a `RasterFrame` is to read a [GeoTIFF](https://en.wikipedia.org/wiki/GeoTIFF) -file using a RasterFrame [`DataSource`](https://spark.apache.org/docs/latest/sql-programming-guide.html#data-sources) -designed for this purpose. - -First add the following import: -*/ - -import astraea.spark.rasterframes.datasource.geotiff._ -/* -(This is what adds the `.geotiff` method to `spark.read` below.) - -Then we use the `DataFrameReader` provided by `spark.read` to read the GeoTIFF: - */ - -// tut:book -val samplePath = new File("src/test/resources/LC08_RGB_Norfolk_COG.tiff") -val tiffRF = spark.read - .geotiff - .loadRF(samplePath.toURI) - -/* -Let's inspect the structure of what we get back: - */ - -// tut -tiffRF.printSchema() - -/* -As reported by Spark, RasterFrames extracts 6 columns from the GeoTIFF we selected. Some of these columns are dependent -on the contents of the source data, and some are are always available. Let's take a look at these in more detail. - -* `spatial_key`: GeoTrellis assigns a `SpatialKey` or a `SpaceTimeKey` to each tile, mapping it to the layer grid from - which it came. If it has a `SpaceTimeKey`, RasterFrames will split it into a `SpatialKey` and a `TemporalKey` (the - latter with column name `temporal_key`). -* `extent`: The bounding box of the tile in the tile's native CRS. -* `metadata`: The TIFF format header tags found in the file. -* `tile` or `tile_n` (where `n` is a band number): For singleband GeoTIFF files, the `tile` column contains the cell - data split into tiles. For multiband tiles, each column with `tile_` prefix contains each of the sources bands, - in the order they were stored. - -See the section [Inspecting a `RasterFrame`](#inspecting-a--code-rasterframe--code-) (below) for more details on accessing the RasterFrame's metadata. - */ - - -/* -## Reading a GeoTrellis Layer - -If your imagery is already ingested into a [GeoTrellis layer](https://docs.geotrellis.io/en/latest/guide/spark.html#writing-layers), -you can use the RasterFrames GeoTrellis DataSource. There are two parts to this GeoTrellis Layer support. The first -is the GeoTrellis Catalog DataSource, which lists the GeoTrellis layers available at a URI. The second part is the actual -RasterFrame reader for pulling a layer into a RasterFrame. - -Before we show how all of this works we need to have a GeoTrellis layer to work with. We can create one with the RasterFrame we -constructed above. - - */ -import astraea.spark.rasterframes.datasource.geotrellis._ - -val base = Files.createTempDirectory("rf-").toUri -val layer = Layer(base, "sample", 0) -tiffRF.write.geotrellis.asLayer(layer).save() - -/* -Now we can point our catalog reader at the base directory and see what was saved: -*/ - -val cat = spark.read.geotrellisCatalog(base) -cat.printSchema -cat.show() - -/* -As you can see, there's a lot of information stored in each row of the catalog. Most of this is associated with how the -layer is discretized. However, there may be other application specific metadata serialized with a layer that can be use -to filter the catalog entries or select a specific one. But for now, we're just going to load a RasterFrame in from the -catalog using a convenience function. - */ - -val firstLayer = cat.select(geotrellis_layer).first -val rfAgain = spark.read.geotrellis.loadRF(firstLayer) -rfAgain.show() - -/* -If you already know the `LayerId` of what you're wanting to read, you can bypass working with the catalog: - */ - -val anotherRF = spark.read.geotrellis.loadRF(layer) - -/* -## Using GeoTrellis APIs - -If you are used to working directly with the GeoTrellis APIs, there are a number of additional ways to create a `RasterFrame`, as enumerated in the sections below. - -First, some standard `import`s: -*/ - -// tut:silent -import geotrellis.raster.io.geotiff.SinglebandGeoTiff -import geotrellis.spark.io._ - -/* -### From `ProjectedExtent` - -The simplest mechanism for getting a RasterFrame is to use the `toRF(tileCols, tileRows)` extension method on `ProjectedRaster`. -*/ - -val scene = SinglebandGeoTiff("src/test/resources/L8-B8-Robinson-IL.tiff") -val rf = scene.projectedRaster.toRF(128, 128) -rf.show(5, false) - -/* -### From `TileLayerRDD` - -Another option is to use a GeoTrellis [`LayerReader`](https://docs.geotrellis.io/en/latest/guide/tile-backends.html), -to get a `TileLayerRDD` for which there's also a `toRF` extension method. - -*/ - -/* -```scala -import geotrellis.spark._ -val tiledLayer: TileLayerRDD[SpatialKey] = ??? -val rf = tiledLayer.toRF -``` -*/ - -/* -## Inspecting a `RasterFrame` - -`RasterFrame` has a number of methods providing access to metadata about the contents of the RasterFrame. - -### Tile Column Names - -*/ - -//```tut:book -rf.tileColumns.map(_.toString) - -/* -### Spatial Key Column Name -*/ - -//```tut:book -rf.spatialKeyColumn.toString - -/* -### Temporal Key Column - -Returns an `Option[Column]` since not all RasterFrames have an explicit temporal dimension. -*/ - -//```tut:book -rf.temporalKeyColumn.map(_.toString) - -/* -### Tile Layer Metadata - -The Tile Layer Metadata defines how the spatial/spatiotemporal domain is discretized into tiles, and what the key bounds are. -*/ - -import spray.json._ -// NB: The `fold` is required because an `Either` is returned, depending on the key type. -rf.tileLayerMetadata.fold(_.toJson, _.toJson).prettyPrint - - -//tut:invisible -spark.stop() - -} diff --git a/experimental/src/test/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSourceTest.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSourceTest.scala similarity index 89% rename from experimental/src/test/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSourceTest.scala rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSourceTest.scala index 425d91ab8..3d8ec9db3 100644 --- a/experimental/src/test/scala/astraea/spark/rasterframes/experimental/datasource/geojson/GeoJsonDataSourceTest.scala +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geojson/GeoJsonDataSourceTest.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2019 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,13 +15,13 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.experimental.datasource.geojson - -import astraea.spark.rasterframes.TestEnvironment +package org.locationtech.rasterframes.datasource.geojson import org.apache.spark.sql.types.{LongType, MapType} +import org.locationtech.rasterframes.TestEnvironment /** * Test rig for GeoJsonRelation. @@ -60,8 +60,7 @@ class GeoJsonDataSourceTest extends TestEnvironment { .option(GeoJsonDataSource.INFER_SCHEMA, true) .load(example2) - results.show() + results.count() should be (8) } } - } diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala similarity index 81% rename from datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala index 1d7237c5b..9b69fd89e 100644 --- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffCollectionDataSourceSpec.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2018 Astraea. Inc. + * Copyright 2018 Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,13 +15,15 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 * */ -package astraea.spark.rasterframes.datasource.geotiff +package org.locationtech.rasterframes.datasource.geotiff import java.io.{File, FilenameFilter} -import astraea.spark.rasterframes._ +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.TestEnvironment /** * @since 1/14/18 @@ -33,7 +35,7 @@ class GeoTiffCollectionDataSourceSpec it("shiould read a directory of files") { val df = spark.read - .geotiff + .format("geotiff") .load(geotiffDir.resolve("*.tiff").toString) val expected = geotiffDir.toFile.list(new FilenameFilter { override def accept(dir: File, name: String): Boolean = name.endsWith("tiff") @@ -41,7 +43,7 @@ class GeoTiffCollectionDataSourceSpec assert(df.select("path").distinct().count() === expected) - df.show(false) + // df.show(false) } } } diff --git a/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala new file mode 100644 index 000000000..eb5e55b0c --- /dev/null +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotiff/GeoTiffDataSourceSpec.scala @@ -0,0 +1,244 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2018 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ +package org.locationtech.rasterframes.datasource.geotiff + +import java.nio.file.Paths + +import geotrellis.proj4._ +import geotrellis.raster.io.geotiff.{MultibandGeoTiff, SinglebandGeoTiff} +import geotrellis.vector.Extent +import org.locationtech.rasterframes._ +import org.apache.spark.sql.functions._ +import org.locationtech.rasterframes.TestEnvironment + +/** + * @since 1/14/18 + */ +class GeoTiffDataSourceSpec + extends TestEnvironment with TestData { + + describe("GeoTiff reading") { + + it("should read sample GeoTiff") { + val rf = spark.read.format("geotiff").load(cogPath.toASCIIString).asLayer + + assert(rf.count() > 10) + } + + it("should lay out tiles correctly") { + + val rf = spark.read.format("geotiff").load(cogPath.toASCIIString).asLayer + + val tlm = rf.tileLayerMetadata.left.get + val gb = tlm.gridBounds + assert(gb.colMax > gb.colMin) + assert(gb.rowMax > gb.rowMin) + } + + it("should lay out tiles correctly for non-tiled tif") { + val rf = spark.read.format("geotiff").load(nonCogPath.toASCIIString).asLayer + + assert(rf.count() > 1) + + import org.apache.spark.sql.functions._ + logger.info( + rf.agg( + min(col("spatial_key.row")) as "rowmin", + max(col("spatial_key.row")) as "rowmax", + min(col("spatial_key.col")) as "colmin", + max(col("spatial_key.col")) as "colmax" + + ).first.toSeq.toString() + ) + val tlm = rf.tileLayerMetadata.left.get + val gb = tlm.gridBounds + assert(gb.rowMax > gb.rowMin) + assert(gb.colMax > gb.colMin) + + } + + it("should read in correctly check-summed contents") { + // c.f. TileStatsSpec -> computing statistics over tiles -> should compute tile statistics -> sum + val rf = spark.read.format("geotiff").load(l8B1SamplePath.toASCIIString).asLayer + val expected = 309149454 // computed with rasterio + val result = rf.agg( + sum(rf_tile_sum(rf("tile"))) + ).collect().head.getDouble(0) + + assert(result === expected) + } + } + + describe("GeoTiff writing") { + + it("should write GeoTIFF RF to parquet") { + val rf = spark.read.format("geotiff").load(cogPath.toASCIIString).asLayer + assert(write(rf)) + } + + it("should write GeoTIFF from layer") { + val rf = spark.read.format("geotiff").load(cogPath.toASCIIString).asLayer + + logger.info(s"Read extent: ${rf.tileLayerMetadata.merge.extent}") + + val out = Paths.get("target", "example-geotiff.tif") + logger.info(s"Writing to $out") + noException shouldBe thrownBy { + rf.write.format("geotiff").save(out.toString) + } + } + + it("should write unstructured raster") { + import spark.implicits._ + val df = spark.read.format("raster") + .option("tileDimensions", "32,32") // oddball + .load(nonCogPath.toASCIIString) // core L8-B8-Robinson-IL.tiff + + df.count() should be > 0L + + val crs = df.select(rf_crs($"proj_raster")).first() + + val out = Paths.get("target", "unstructured.tif").toString + + noException shouldBe thrownBy { + df.write.geotiff.withCRS(crs).save(out) + } + + val (inCols, inRows) = { + val id = sampleGeoTiff.imageData // inshallah same as nonCogPath + (id.cols, id.rows) + } + inCols should be (774) + inRows should be (500) //from gdalinfo + + val outputTif = SinglebandGeoTiff(out) + outputTif.imageData.cols should be (inCols) + outputTif.imageData.rows should be (inRows) + + // TODO check datatype, extent. + } + + it("should round trip unstructured raster from COG"){ + import spark.implicits._ + import org.locationtech.rasterframes.datasource.raster._ + + val df = spark.read.raster.withTileDimensions(64, 64).load(singlebandCogPath.toASCIIString) + + val resourceCols = 963 // from gdalinfo + val resourceRows = 754 + val resourceExtent = Extent(752325.0, 3872685.0, 781215.0, 3895305.0) + + df.count() should be > 0L + + val crs = df.select(rf_crs(col("proj_raster"))).first() + + val totalExtentRow = df.select(rf_extent($"proj_raster").alias("ext")) + .agg( + min($"ext.xmin").alias("xmin"), + min($"ext.ymin").alias("ymin"), + max($"ext.xmax").alias("xmax"), + max($"ext.ymax").alias("ymax") + ).first() + + val dfExtent = Extent(totalExtentRow.getDouble(0), totalExtentRow.getDouble(1), totalExtentRow.getDouble(2), totalExtentRow.getDouble(3)) + logger.info(s"Dataframe extent: ${dfExtent.toString()}") + + dfExtent shouldBe resourceExtent + + val out = Paths.get("target", "unstructured_cog.tif").toString + + noException shouldBe thrownBy { + df.write.geotiff.withCRS(crs).save(out) + } + + val (inCols, inRows, inExtent, inCellType) = { + val tif = readSingleband("LC08_B7_Memphis_COG.tiff") + val id = tif.imageData + (id.cols, id.rows, tif.extent, tif.cellType) + } + inCols should be (963) + inRows should be (754) //from gdalinfo + inExtent should be (resourceExtent) + + val outputTif = SinglebandGeoTiff(out) + outputTif.imageData.cols should be (inCols) + outputTif.imageData.rows should be (inRows) + outputTif.extent should be (resourceExtent) + outputTif.cellType should be (inCellType) + } + + it("should write GeoTIFF without layer") { + import org.locationtech.rasterframes.datasource.raster._ + val pr = col("proj_raster_b0") + val rf = spark.read.raster.withBandIndexes(0, 1, 2).load(rgbCogSamplePath.toASCIIString) + + val out = Paths.get("target", "example2-geotiff.tif") + logger.info(s"Writing to $out") + + withClue("explicit extent/crs") { + noException shouldBe thrownBy { + rf + .withColumn("extent", rf_extent(pr)) + .withColumn("crs", rf_crs(pr)) + .write.geotiff.withCRS(LatLng).save(out.toString) + } + } + + withClue("without explicit extent/crs") { + noException shouldBe thrownBy { + rf + .write.geotiff.withCRS(LatLng).save(out.toString) + } + } + withClue("with downsampling") { + noException shouldBe thrownBy { + rf + .write.geotiff + .withCRS(LatLng) + .withDimensions(128, 128) + .save(out.toString) + } + } + } + + def s(band: Int): String = + s"https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/" + + s"MCD43A4.A2019059.h11v08.006.2019072203257_B0${band}.TIF" + + it("shoud write multiband") { + import org.locationtech.rasterframes.datasource.raster._ + + val cat = s""" +red,green,blue +${s(1)},${s(4)},${s(3)} +""" + val scene = spark.read.raster.fromCSV(cat, "red", "green", "blue").load() + val out = Paths.get("target", "geotiff-overview.tif").toString + scene.write.geotiff + .withCRS(LatLng) + .withDimensions(256, 256) + .save(out) + + val outTif = MultibandGeoTiff(out) + outTif.bandCount should be (3) + } + } +} diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala similarity index 86% rename from datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala index e69a5414a..c409eb216 100644 --- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisCatalogSpec.scala @@ -15,17 +15,20 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis import java.io.File -import astraea.spark.rasterframes._ -import geotrellis.proj4.{CRS, LatLng, Sinusoidal} +import org.locationtech.rasterframes._ +import geotrellis.proj4.LatLng import geotrellis.spark._ import geotrellis.spark.io._ import geotrellis.spark.io.index.ZCurveKeyIndexMethod import org.apache.hadoop.fs.FileUtil +import org.locationtech.rasterframes.TestEnvironment import org.scalatest.BeforeAndAfter /** @@ -67,7 +70,7 @@ class GeoTrellisCatalogSpec .collect assert(layer.length === 2) - val lots = layer.map(sqlContext.read.geotrellis.loadRF).map(_.toDF).reduce(_ union _) + val lots = layer.map(sqlContext.read.geotrellis.loadLayer).map(_.toDF).reduce(_ union _) assert(lots.count === 60) } } diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala similarity index 79% rename from datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala index 009382639..ecd3351df 100644 --- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/GeoTrellisDataSourceSpec.scala @@ -1,7 +1,7 @@ /* * This software is licensed under the Apache 2 license, quoted below. * - * Copyright 2017-2018 Azavea & Astraea, Inc. + * Copyright 2017-2019 Azavea & Astraea, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of @@ -15,17 +15,19 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis import java.io.File import java.sql.Timestamp import java.time.ZonedDateTime -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.datasource.DataSourceOptions -import astraea.spark.rasterframes.rules._ -import astraea.spark.rasterframes.util._ +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.datasource.DataSourceOptions +import org.locationtech.rasterframes.rules._ +import org.locationtech.rasterframes.util._ import geotrellis.proj4.LatLng import geotrellis.raster._ import geotrellis.raster.resample.NearestNeighbor @@ -40,9 +42,10 @@ import geotrellis.vector._ import org.apache.avro.generic._ import org.apache.avro.{Schema, SchemaBuilder} import org.apache.hadoop.fs.FileUtil -import org.apache.spark.sql.functions.{udf ⇒ sparkUdf} +import org.apache.spark.sql.functions.{udf => sparkUdf} import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.storage.StorageLevel +import org.locationtech.rasterframes.TestEnvironment import org.scalatest.{BeforeAndAfterAll, Inspectors} import scala.math.{max, min} @@ -88,7 +91,7 @@ class GeoTrellisDataSourceSpec outputDir.deleteOnExit() // Test layer writing via RF - testRdd.toRF.write.geotrellis.asLayer(layer).save() + testRdd.toLayer.write.geotrellis.asLayer(layer).save() val tfRdd = testRdd.map { case (k, tile) ⇒ val md = Map("col" -> k.col,"row" -> k.row) @@ -114,7 +117,7 @@ class GeoTrellisDataSourceSpec val tlfRdd = ContextRDD(tfRdd, testRdd.metadata) writer.write(tfLayer.id, tlfRdd, ZCurveKeyIndexMethod.byDay()) - //TestData.sampleTileLayerRDD.toRF.write.geotrellis.asLayer(sampleImageLayer).save() + //TestData.sampleTileLayerRDD.toLayer.write.geotrellis.asLayer(sampleImageLayer).save() val writer2 = LayerWriter(sampleImageLayer.base) val imgRDD = TestData.sampleTileLayerRDD writer2.write(sampleImageLayer.id, imgRDD, ZCurveKeyIndexMethod) @@ -123,13 +126,13 @@ class GeoTrellisDataSourceSpec describe("DataSource reading") { def layerReader = spark.read.geotrellis it("should read tiles") { - val df = layerReader.loadRF(layer) + val df = layerReader.loadLayer(layer) assert(df.count === tileCoordRange.length * tileCoordRange.length) } it("used produce tile UDT that we can manipulate") { - val df = layerReader.loadRF(layer) - .select(SPATIAL_KEY_COLUMN, tile_stats(TILE_COLUMN)) + val df = layerReader.loadLayer(layer) + .select(SPATIAL_KEY_COLUMN, rf_tile_stats(TILE_COLUMN)) assert(df.count() > 0) } @@ -138,7 +141,7 @@ class GeoTrellisDataSourceSpec val bbox = testRdd.metadata.layout .mapTransform(boundKeys.toGridBounds()) .jtsGeom - val wc = layerReader.loadRF(layer).withCenter() + val wc = layerReader.loadLayer(layer).withCenter() withClue("literate API") { val df = wc.where(CENTER_COLUMN intersects bbox) @@ -151,7 +154,7 @@ class GeoTrellisDataSourceSpec } it("should invoke Encoder[Extent]") { - val df = layerReader.loadRF(layer).withBounds() + val df = layerReader.loadLayer(layer).withGeometry() assert(df.count > 0) assert(df.first.length === 5) assert(df.first.getAs[Extent](2) !== null) @@ -159,7 +162,7 @@ class GeoTrellisDataSourceSpec it("should write to parquet") { //just should not throw - val df = layerReader.loadRF(layer) + val df = layerReader.loadLayer(layer) assert(write(df)) } } @@ -169,23 +172,23 @@ class GeoTrellisDataSourceSpec val expected = 2 val df = spark.read.geotrellis .withNumPartitions(expected) - .loadRF(layer) + .loadLayer(layer) assert(df.rdd.partitions.length === expected) } it("should respect partitions 20") { val expected = 20 val df = spark.read.geotrellis .withNumPartitions(expected) - .loadRF(layer) + .loadLayer(layer) assert(df.rdd.partitions.length === expected) } it("should respect subdivide 2") { val param = 2 - val df: RasterFrame = spark.read.geotrellis + val df: RasterFrameLayer = spark.read.geotrellis .withTileSubdivisions(param) - .loadRF(layer) + .loadLayer(layer) - val dims = df.select(tile_dimensions(df.tileColumns.head)("cols"), tile_dimensions(df.tileColumns.head)("rows")).first() + val dims = df.select(rf_dimensions(df.tileColumns.head)("cols"), rf_dimensions(df.tileColumns.head)("rows")).first() assert(dims.getAs[Int](0) === tileSize / param) assert(dims.getAs[Int](1) === tileSize / param) @@ -194,11 +197,11 @@ class GeoTrellisDataSourceSpec } it("should respect subdivide with TileFeature"){ val param = 2 - val rf: RasterFrame = spark.read.geotrellis + val rf: RasterFrameLayer = spark.read.geotrellis .withTileSubdivisions(param) - .loadRF(tfLayer) + .loadLayer(tfLayer) - val dims = rf.select(tile_dimensions(rf.tileColumns.head)("cols"), tile_dimensions(rf.tileColumns.head)("rows")) + val dims = rf.select(rf_dimensions(rf.tileColumns.head)("cols"), rf_dimensions(rf.tileColumns.head)("rows")) .first() assert(dims.getAs[Int](0) === tileSize / param) assert(dims.getAs[Int](1) === tileSize / param) @@ -213,14 +216,14 @@ class GeoTrellisDataSourceSpec .geotrellis .withNumPartitions(7) .withTileSubdivisions(subParam) - .loadRF(layer) + .loadLayer(layer) // is it partitioned correctly? assert(rf.rdd.partitions.length === 7) // is it subdivided? assert(rf.count === testRdd.count * subParam * subParam) - val dims = rf.select(tile_dimensions(rf.tileColumns.head)("cols"), tile_dimensions(rf.tileColumns.head)("rows")) + val dims = rf.select(rf_dimensions(rf.tileColumns.head)("cols"), rf_dimensions(rf.tileColumns.head)("rows")) .first() assert(dims.getAs[Int](0) === tileSize / subParam) assert(dims.getAs[Int](1) === tileSize / subParam) @@ -230,7 +233,7 @@ class GeoTrellisDataSourceSpec val subs = 4 val rf = spark.read.geotrellis .withTileSubdivisions(subs) - .loadRF(sampleImageLayer) + .loadLayer(sampleImageLayer) assert(rf.count === (TestData.sampleTileLayerRDD.count * subs * subs)) @@ -247,13 +250,13 @@ class GeoTrellisDataSourceSpec it("should throw on subdivide 5") { // only throws when an action is taken... - assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(5).loadRF(layer).cache) + assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(5).loadLayer(layer).cache) } it("should throw on subdivide 13") { - assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(13).loadRF(layer).cache) + assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(13).loadLayer(layer).cache) } it("should throw on subdivide -3") { - assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(-3).loadRF(layer).count) + assertThrows[IllegalArgumentException](spark.read.geotrellis.withTileSubdivisions(-3).loadLayer(layer).count) } } @@ -284,8 +287,8 @@ class GeoTrellisDataSourceSpec it("should support extent against a geometry literal") { val df: DataFrame = layerReader - .loadRF(layer) - .where(BOUNDS_COLUMN intersects pt1) + .loadLayer(layer) + .where(GEOMETRY_COLUMN intersects pt1) assert(numFilters(df) === 1) @@ -295,8 +298,8 @@ class GeoTrellisDataSourceSpec it("should support query with multiple geometry types") { // Mostly just testing that these evaluate without catalyst type errors. - forEvery(JTS.all) { g ⇒ - val query = layerReader.loadRF(layer).where(BOUNDS_COLUMN.intersects(g)) + forEvery(GeomData.all) { g ⇒ + val query = layerReader.loadLayer(layer).where(GEOMETRY_COLUMN.intersects(g)) .persist(StorageLevel.OFF_HEAP) assert(query.count() === 0) } @@ -308,8 +311,8 @@ class GeoTrellisDataSourceSpec val mkPtFcn = sparkUdf((_: Row) ⇒ { Point(-88, 60).jtsGeom }) val df = layerReader - .loadRF(layer) - .where(st_intersects(BOUNDS_COLUMN, mkPtFcn(SPATIAL_KEY_COLUMN))) + .loadLayer(layer) + .where(st_intersects(GEOMETRY_COLUMN, mkPtFcn(SPATIAL_KEY_COLUMN))) assert(numFilters(df) === 0) @@ -320,7 +323,7 @@ class GeoTrellisDataSourceSpec it("should support temporal predicates") { withClue("at now") { val df = layerReader - .loadRF(layer) + .loadLayer(layer) .where(TIMESTAMP_COLUMN === Timestamp.valueOf(now.toLocalDateTime)) assert(numFilters(df) == 1) @@ -329,7 +332,7 @@ class GeoTrellisDataSourceSpec withClue("at earlier") { val df = layerReader - .loadRF(layer) + .loadLayer(layer) .where(TIMESTAMP_COLUMN === Timestamp.valueOf(now.minusDays(1).toLocalDateTime)) assert(numFilters(df) === 1) @@ -338,7 +341,7 @@ class GeoTrellisDataSourceSpec withClue("between now") { val df = layerReader - .loadRF(layer) + .loadLayer(layer) .where(TIMESTAMP_COLUMN betweenTimes (now.minusDays(1), now.plusDays(1))) assert(numFilters(df) === 1) @@ -347,7 +350,7 @@ class GeoTrellisDataSourceSpec withClue("between later") { val df = layerReader - .loadRF(layer) + .loadLayer(layer) .where(TIMESTAMP_COLUMN betweenTimes (now.plusDays(1), now.plusDays(2))) assert(numFilters(df) === 1) @@ -358,10 +361,10 @@ class GeoTrellisDataSourceSpec it("should support nested predicates") { withClue("fully nested") { val df = layerReader - .loadRF(layer) + .loadLayer(layer) .where( - ((BOUNDS_COLUMN intersects pt1) || - (BOUNDS_COLUMN intersects pt2)) && + ((GEOMETRY_COLUMN intersects pt1) || + (GEOMETRY_COLUMN intersects pt2)) && (TIMESTAMP_COLUMN === Timestamp.valueOf(now.toLocalDateTime)) ) @@ -373,8 +376,8 @@ class GeoTrellisDataSourceSpec withClue("partially nested") { val df = layerReader - .loadRF(layer) - .where((BOUNDS_COLUMN intersects pt1) || (BOUNDS_COLUMN intersects pt2)) + .loadLayer(layer) + .where((GEOMETRY_COLUMN intersects pt1) || (GEOMETRY_COLUMN intersects pt2)) .where(TIMESTAMP_COLUMN === Timestamp.valueOf(now.toLocalDateTime)) assert(numFilters(df) === 1) @@ -387,17 +390,17 @@ class GeoTrellisDataSourceSpec it("should support intersects with between times") { withClue("intersects first") { val df = layerReader - .loadRF(layer) - .where(BOUNDS_COLUMN intersects pt1) + .loadLayer(layer) + .where(GEOMETRY_COLUMN intersects pt1) .where(TIMESTAMP_COLUMN betweenTimes(now.minusDays(1), now.plusDays(1))) assert(numFilters(df) == 1) } withClue("intersects last") { val df = layerReader - .loadRF(layer) + .loadLayer(layer) .where(TIMESTAMP_COLUMN betweenTimes(now.minusDays(1), now.plusDays(1))) - .where(BOUNDS_COLUMN intersects pt1) + .where(GEOMETRY_COLUMN intersects pt1) assert(numFilters(df) == 1) } @@ -405,10 +408,10 @@ class GeoTrellisDataSourceSpec withClue("untyped columns") { import spark.implicits._ val df = layerReader - .loadRF(layer) + .loadLayer(layer) .where($"timestamp" >= Timestamp.valueOf(now.minusDays(1).toLocalDateTime)) .where($"timestamp" <= Timestamp.valueOf(now.plusDays(1).toLocalDateTime)) - .where(st_intersects($"bounds", geomLit(pt1.jtsGeom))) + .where(st_intersects(GEOMETRY_COLUMN, geomLit(pt1.jtsGeom))) assert(numFilters(df) == 1) } @@ -417,19 +420,19 @@ class GeoTrellisDataSourceSpec it("should handle renamed spatial filter columns") { val df = layerReader - .loadRF(layer) - .where(BOUNDS_COLUMN intersects region.jtsGeom) - .withColumnRenamed(BOUNDS_COLUMN.columnName, "foobar") + .loadLayer(layer) + .where(GEOMETRY_COLUMN intersects region.jtsGeom) + .withColumnRenamed(GEOMETRY_COLUMN.columnName, "foobar") assert(numFilters(df) === 1) - assert(df.count > 0, df.printSchema) + assert(df.count > 0, df.schema.treeString) } it("should handle dropped spatial filter columns") { val df = layerReader - .loadRF(layer) - .where(BOUNDS_COLUMN intersects region.jtsGeom) - .drop(BOUNDS_COLUMN) + .loadLayer(layer) + .where(GEOMETRY_COLUMN intersects region.jtsGeom) + .drop(GEOMETRY_COLUMN) assert(numFilters(df) === 1) } @@ -437,18 +440,18 @@ class GeoTrellisDataSourceSpec describe("TileFeature support") { def layerReader = spark.read.geotrellis - it("should resolve TileFeature-based RasterFrame") { - val rf = layerReader.loadRF(tfLayer) + it("should resolve TileFeature-based RasterFrameLayer") { + val rf = layerReader.loadLayer(tfLayer) //rf.show(false) assert(rf.collect().length === testRdd.count()) } - it("should respect subdivideTile option on TileFeature RasterFrame") { + it("should respect subdivideTile option on TileFeature RasterFrameLayer") { val subParam = 4 - val rf = spark.read.option(TILE_SUBDIVISIONS_PARAM, subParam).geotrellis.loadRF(tfLayer) + val rf = spark.read.option(TILE_SUBDIVISIONS_PARAM, subParam).geotrellis.loadLayer(tfLayer) assert(rf.count === testRdd.count * subParam * subParam) - val dims = rf.select(tile_dimensions(rf.tileColumns.head)("cols"), tile_dimensions(rf.tileColumns.head)("rows")) + val dims = rf.select(rf_dimensions(rf.tileColumns.head)("cols"), rf_dimensions(rf.tileColumns.head)("rows")) .first() assert(dims.getAs[Int](0) === tileSize / subParam) assert(dims.getAs[Int](1) === tileSize / subParam) @@ -459,11 +462,11 @@ class GeoTrellisDataSourceSpec val rf = spark.read .option(TILE_SUBDIVISIONS_PARAM, subParam) .option(NUM_PARTITIONS_PARAM, 10) - .geotrellis.loadRF(tfLayer) + .geotrellis.loadLayer(tfLayer) // is it subdivided? assert(rf.count === testRdd.count * subParam * subParam) - val dims = rf.select(tile_dimensions(rf.tileColumns.head)("cols"), tile_dimensions(rf.tileColumns.head)("rows")) + val dims = rf.select(rf_dimensions(rf.tileColumns.head)("cols"), rf_dimensions(rf.tileColumns.head)("rows")) .first() assert(dims.getAs[Int](0) === tileSize / subParam) assert(dims.getAs[Int](1) === tileSize / subParam) diff --git a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala similarity index 94% rename from datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala rename to datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala index 5d475f263..0cf7e358c 100644 --- a/datasource/src/test/scala/astraea/spark/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/geotrellis/TileFeatureSupportSpec.scala @@ -15,13 +15,15 @@ * License for the specific language governing permissions and limitations under * the License. * + * SPDX-License-Identifier: Apache-2.0 + * */ -package astraea.spark.rasterframes.datasource.geotrellis +package org.locationtech.rasterframes.datasource.geotrellis -import astraea.spark.rasterframes._ -import astraea.spark.rasterframes.datasource.geotrellis.TileFeatureSupport._ -import astraea.spark.rasterframes.util.{WithCropMethods, WithMaskMethods, WithMergeMethods, WithPrototypeMethods} +import org.locationtech.rasterframes._ +import org.locationtech.rasterframes.datasource.geotrellis.TileFeatureSupport._ +import org.locationtech.rasterframes.util.{WithCropMethods, WithMaskMethods, WithMergeMethods, WithPrototypeMethods} import geotrellis.proj4.LatLng import geotrellis.raster.crop.Crop import geotrellis.raster.rasterize.Rasterizer @@ -32,6 +34,7 @@ import geotrellis.spark.tiling._ import geotrellis.vector.{Extent, ProjectedExtent} import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD +import org.locationtech.rasterframes.TestEnvironment import org.scalatest.BeforeAndAfter import scala.reflect.ClassTag @@ -151,8 +154,6 @@ class TileFeatureSupportSpec extends TestEnvironment object TileFeatureSupportSpec { - import scala.language.implicitConversions - implicit class RichRandom(val rnd: scala.util.Random) extends AnyVal { def nextDouble(max: Double): Double = (rnd.nextInt * max) / Int.MaxValue.toDouble def nextOrderedPair(max:Double): (Double,Double) = (nextDouble(max),nextDouble(max)) match { diff --git a/datasource/src/test/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSourceSpec.scala b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSourceSpec.scala new file mode 100644 index 000000000..a02c858e1 --- /dev/null +++ b/datasource/src/test/scala/org/locationtech/rasterframes/datasource/raster/RasterSourceDataSourceSpec.scala @@ -0,0 +1,314 @@ +/* + * This software is licensed under the Apache 2 license, quoted below. + * + * Copyright 2019 Astraea, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * [http://www.apache.org/licenses/LICENSE-2.0] + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + * SPDX-License-Identifier: Apache-2.0 + * + */ + +package org.locationtech.rasterframes.datasource.raster +import geotrellis.raster.Tile +import org.apache.spark.sql.functions.{lit, udf, round} +import org.locationtech.rasterframes.{TestEnvironment, _} +import org.locationtech.rasterframes.datasource.raster.RasterSourceDataSource.{RasterSourceCatalog, _} +import org.locationtech.rasterframes.model.TileDimensions +import org.locationtech.rasterframes.ref.RasterRef.RasterRefTile +import org.locationtech.rasterframes.util._ + +class RasterSourceDataSourceSpec extends TestEnvironment with TestData { + import spark.implicits._ + + describe("DataSource parameter processing") { + def singleCol(paths: Iterable[String]) = { + val rows = paths.mkString(DEFAULT_COLUMN_NAME + "\n", "\n", "") + RasterSourceCatalog(rows, DEFAULT_COLUMN_NAME) + } + + it("should handle single `path`") { + val p = Map(PATH_PARAM -> "/usr/local/foo/bar.tif") + p.catalog should be (Some(singleCol(p.values))) + } + + it("should handle single `paths`") { + val p = Map(PATHS_PARAM -> "/usr/local/foo/bar.tif") + p.catalog should be (Some(singleCol(p.values))) + } + it("should handle multiple `paths`") { + val expected = Seq("/usr/local/foo/bar.tif", "/usr/local/bar/foo.tif") + val p = Map(PATHS_PARAM -> expected.mkString("\n\r", "\n\n", "\r")) + p.catalog should be (Some(singleCol(expected))) + } + it("should handle both `path` and `paths`") { + val expected1 = Seq("/usr/local/foo/bar.tif", "/usr/local/bar/foo.tif") + val expected2 = "/usr/local/barf/baz.tif" + val p = Map(PATHS_PARAM -> expected1.mkString("\n"), PATH_PARAM -> expected2) + p.catalog should be (Some(singleCol(expected1 :+ expected2))) + } + it("should parse tile dimensions") { + val p = Map(TILE_DIMS_PARAM -> "4, 5") + p.tileDims should be (Some(TileDimensions(4, 5))) + } + + it("should parse path table specification") { + val p = Map(CATALOG_TABLE_PARAM -> "catalogTable", CATALOG_TABLE_COLS_PARAM -> "path") + p.pathSpec should be (Right(RasterSourceCatalogRef("catalogTable", "path"))) + } + + it("should parse path table from CSV") { + val bands = Seq("B1", "B2", "B3") + val paths = Seq("/usr/local/foo/bar.tif", "/usr/local/bar/foo.tif", "/usr/local/barf/baz.tif") + val csv = + s""" + |${bands.mkString(",")} + |${paths.mkString(",")} + """.stripMargin.trim + val p = Map(CATALOG_CSV_PARAM -> csv) + p.pathSpec should be (Left(RasterSourceCatalog(csv))) + } + } + + describe("RasterSource as relation reading") { + val b = DEFAULT_COLUMN_NAME + + it("should default to a single band schema") { + val df = spark.read.raster.load(l8B1SamplePath.toASCIIString) + val tcols = df.tileColumns + tcols.length should be(1) + tcols.map(_.columnName) should contain(DEFAULT_COLUMN_NAME) + } + it("should support a multiband schema") { + val df = spark.read + .raster + .withBandIndexes(0, 1, 2) + .load(cogPath.toASCIIString) + val tcols = df.tileColumns + tcols.length should be(3) + tcols.map(_.columnName) should contain allElementsOf Seq("_b0", "_b1", "_b2").map(s => DEFAULT_COLUMN_NAME + s) + } + it("should read a multiband file") { + val df = spark.read + .raster + .withBandIndexes(0, 1, 2) + .load(cogPath.toASCIIString) + .cache() + df.schema.size should be (4) + // Test (roughly) we have three distinct but compabible bands + val stats = df.agg(rf_agg_stats($"${b}_b0") as "s0", rf_agg_stats($"${b}_b1") as "s1", rf_agg_stats($"${b}_b2") as "s2") + stats.select($"s0.data_cells" === $"s1.data_cells").as[Boolean].first() should be(true) + stats.select($"s0.data_cells" === $"s2.data_cells").as[Boolean].first() should be(true) + stats.select($"s0.mean" =!= $"s1.mean").as[Boolean].first() should be(true) + stats.select($"s0.mean" =!= $"s2.mean").as[Boolean].first() should be(true) + } + it("should read a single file") { + // Image is 1028 x 989 -> 9 x 8 tiles + val df = spark.read.raster + .withTileDimensions(128, 128) + .load(cogPath.toASCIIString) + + df.count() should be(math.ceil(1028.0 / 128).toInt * math.ceil(989.0 / 128).toInt) + + val dims = df.select(rf_dimensions($"$b").as[TileDimensions]).distinct().collect() + dims should contain allElementsOf + Seq(TileDimensions(4,128), TileDimensions(128,128), TileDimensions(128,93), TileDimensions(4,93)) + + df.select($"${b}_path").distinct().count() should be(1) + } + it("should read a multiple files with one band") { + val df = spark.read.raster + .from(Seq(cogPath, l8B1SamplePath, nonCogPath)) + .withTileDimensions(128, 128) + .load() + df.select($"${b}_path").distinct().count() should be(3) + df.schema.size should be(2) + } + it("should read a multiple files with heterogeneous bands") { + val df = spark.read.raster + .from(Seq(cogPath, l8B1SamplePath, nonCogPath)) + .withLazyTiles(false) + .withTileDimensions(128, 128) + .withBandIndexes(0, 1, 2, 3) + .load() + .cache() + df.select($"${b}_path").distinct().count() should be(3) + df.schema.size should be(5) + + df.select($"${b}_b0").count() should be (df.select($"${b}_b0").na.drop.count()) + df.select($"${b}_b1").na.drop.count() shouldBe <(df.count()) + df.select($"${b}_b1").na.drop.count() should be (df.select($"${b}_b2").na.drop.count()) + df.select($"${b}_b3").na.drop.count() should be (0) + } + + it("should read a set of coherent bands from multiple files from a CSV") { + val bands = Seq("B1", "B2", "B3") + val paths = Seq( + l8SamplePath(1).toASCIIString, + l8SamplePath(2).toASCIIString, + l8SamplePath(3).toASCIIString + ) + + val csv = + s""" + |${bands.mkString(",")} + |${paths.mkString(",")} + """.stripMargin.trim + + val df = spark.read.raster + .fromCSV(csv) + .withTileDimensions(128, 128) + .load() + + df.schema.size should be(6) + df.tileColumns.size should be (3) + df.select($"B1_path").distinct().count() should be (1) + } + + it("should read a set of coherent bands from multiple files in a dataframe") { + val bandPaths = Seq(( + l8SamplePath(1).toASCIIString, + l8SamplePath(2).toASCIIString, + l8SamplePath(3).toASCIIString)) + .toDF("B1", "B2", "B3") + .withColumn("foo", lit("something")) + + val df = spark.read.raster + .fromCatalog(bandPaths, "B1", "B2", "B3") + .withTileDimensions(128, 128) + .load() + + df.schema.size should be(7) + df.tileColumns.size should be (3) + df.select($"B1_path").distinct().count() should be (1) + + df.columns.contains("foo") should be (true) + df.select($"foo").distinct().count() should be (1) + df.select($"foo".as[String]).first() should be ("something") + + val diffStats = df.select(rf_tile_stats($"B1") =!= rf_tile_stats($"B2")).as[Boolean].collect() + diffStats.forall(identity) should be(true) + } + + it("should read a set of coherent bands from multiple files in a csv") { + def b(i: Int) = l8SamplePath(i).toASCIIString + + val csv = + s""" + |B1, B2, B3, foo + |${b(1)}, ${b(2)}, ${b(3)}, something + """.stripMargin + + val df = spark.read.raster + .fromCSV(csv, "B1", "B2", "B3") + .withTileDimensions(128, 128) + .load() + + df.schema.size should be(7) + df.tileColumns.size should be (3) + df.select($"B1_path").distinct().count() should be (1) + + df.columns.contains("foo") should be (true) + df.select($"foo").distinct().count() should be (1) + df.select($"foo".as[String]).first() should be ("something") + + val diffStats = df.select(rf_tile_stats($"B1") =!= rf_tile_stats($"B2")).as[Boolean].collect() + diffStats.forall(identity) should be(true) + } + + it("should support lazy and strict reading of tiles") { + val is_lazy = udf((t: Tile) => { + t.isInstanceOf[RasterRefTile] + }) + + val df1 = spark.read.raster + .withLazyTiles(true) + .load(l8SamplePath(1).toASCIIString) + + df1.select(is_lazy($"proj_raster.tile").as[Boolean]).first() should be (true) + + val df2 = spark.read.raster + .withLazyTiles(false) + .load(l8SamplePath(1).toASCIIString) + + df2.select(is_lazy($"proj_raster.tile").as[Boolean]).first() should be (false) + + } + } + + describe("RasterSource breaks up scenes into tiles") { + val modis_df = spark.read.raster + .withTileDimensions(128, 128) + .withLazyTiles(true) + .load(remoteMODIS.toASCIIString) + + val l8_df = spark.read.raster + .withTileDimensions(32, 33) + .withLazyTiles(true) + .load(remoteL8.toASCIIString) + + ignore("should have at most four tile dimensions reading MODIS; ignore until fix #242") { + val dims = modis_df.select(rf_dimensions($"proj_raster")).distinct().collect() + dims.length should be > (0) + dims.length should be <= (4) + } + + it("should have at most four tile dimensions reading landsat") { + val dims = l8_df.select(rf_dimensions($"proj_raster")).distinct().collect() + dims.length should be > (0) + dims.length should be <= (4) + } + + it("should provide MODIS tiles with requested size") { + val res = modis_df + .withColumn("dims", rf_dimensions($"proj_raster")) + .select($"dims".as[TileDimensions]).distinct().collect() + + forEvery(res) { r => + r.cols should be <=128 + r.rows should be <=128 + } + } + + it("should provide Landsat tiles with requested size") { + val dims = l8_df + .withColumn("dims", rf_dimensions($"proj_raster")) + .select($"dims".as[TileDimensions]).distinct().collect() + + forEvery(dims) { d => + d.cols should be <=32 + d.rows should be <=33 + } + } + + it("should have consistent tile resolution reading MODIS") { + val res = modis_df + .withColumn("ext", rf_extent($"proj_raster")) + .withColumn("dims", rf_dimensions($"proj_raster")) + .select(round(($"ext.xmax" - $"ext.xmin") / $"dims.cols", 5)) + .distinct().collect() + withClue(res.mkString("(", ", ", ")")) { + res.length should be(1) + } + } + + it("should have consistent tile resolution reading Landsat") { + val res = l8_df + .withColumn("ext", rf_extent($"proj_raster")) + .withColumn("dims", rf_dimensions($"proj_raster")) + .select(($"ext.xmax" - $"ext.xmin") / $"dims.cols") + .distinct().collect() + res.length should be (1) + } + } +} diff --git a/deployment/README.md b/deployment/README.md deleted file mode 100644 index 5e008b8a1..000000000 --- a/deployment/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# RasterFrames Jupyter Notebook Docker Container - -RasterFrames provides a Docker image with a Jupyter Notebook pre-configured with RasterFrames support for Python 3 and Scala Spylon kernels. - -## Quick start - -This will use the [latest image](https://hub.docker.com/r/s22s/rasterframes-notebooks/) published to Docker Hub. - -```bash -# Optionally pull the latest image. -$ docker pull s22s/rasterframes-notebooks - -# from root of the git repo -$ cd deployment/docker/jupyter -$ docker-compose up -``` - -## Custom run - -The `docker-compose` incantation automatically exposes port 8888 for the Jupyter Notebook and ports ports 4040-4044 for the Spark UI. - -The image can equivalently be run with: - - $ docker run -it --rm -p 8888:8888 -p 4040-4044:4040-4044 s22s/rasterframes-notebooks - -The `docker run` command can be changed to quickly customize the container. - -To mount a directory on the host machine (to load or save local files directly from Jupyter) add - - -v /some/host/folder/for/work:/home/jovyan/work - -to the command. - -Attach the notebook server to a different host port with - - -p 8630:8888 - -if you already have a notebook server running on port 8888. - -If you want to use a known password, use - -```bash -docker run -it --rm -p 8888:8888 -p 4040-4044:4040-4044 \ - s22s/rasterframes-notebooks \ - start-notebook.sh --NotebookApp.password='sha1:1c360e8dd3e1:946d17ef9e6b8cbb28c7bb0152329786918cc424' -``` - -Where the password sha is generated with [`notebook.auth.passwd`](https://jupyter-notebook.readthedocs.io/en/stable/public_server.html#preparing-a-hashed-password). - -Please see the `Dockerfile` and the `docker-compose.yml` file on GitHub ([here](https://github.com/locationtech/rasterframes/tree/develop/deployment/docker/jupyter)) as a starting point to customize your image and container. - - -## For Development - -To build the Docker image based on local development changes: - -```bash -# from the root of the repo -sbt deployment/rfDocker -``` - -## Base images - -This image is based on [jupyter/pyspark-notebook](https://hub.docker.com/r/jupyter/pyspark-notebook), with some -portions from [jupyter/all-spark-notebook](https://hub.docker.com/r/jupyter/all-spark-notebook). -Much more extensive instructions can be found at those locations. \ No newline at end of file diff --git a/deployment/build.sbt b/deployment/build.sbt deleted file mode 100644 index c76ef554b..000000000 --- a/deployment/build.sbt +++ /dev/null @@ -1,56 +0,0 @@ -import sbt.{IO, _} - -import scala.sys.process.Process - -moduleName := "rasterframes-deployment" - -val Docker = config("docker") -val Python = config("python") - - -lazy val rfDockerImageName = settingKey[String]("Name to tag Docker image with.") -rfDockerImageName := "s22s/rasterframes-notebooks" - -lazy val rfDocker = taskKey[Unit]("Build Jupyter Notebook Docker image with RasterFrames support.") -rfDocker := (Docker / packageBin).value - -lazy val runRFNotebook = taskKey[String]("Run RasterFrames Jupyter Notebook image") -runRFNotebook := { - val imageName = rfDockerImageName.value - val _ = rfDocker.value - Process(s"docker run -p 8888:8888 -p 4040:4040 $imageName").run() - imageName -} - -Docker / resourceDirectory := baseDirectory.value / "docker"/ "jupyter" - -Docker / target := target.value / "docker" - -Docker / mappings := { - val rezDir = (Docker / resourceDirectory).value - val files = (rezDir ** "*") pair Path.relativeTo(rezDir) - - val jar = (assembly in LocalProject("pyrasterframes")).value - val py = (packageBin in (LocalProject("pyrasterframes"), Python)).value - - files ++ Seq(jar -> jar.getName, py -> py.getName) -} - -def rfFiles = Def.task { - val destDir = (Docker / target).value - val filePairs = (Docker / mappings).value - IO.copy(filePairs.map { case (src, dst) ⇒ (src, destDir / dst) }) -} - -Docker / packageBin := { - val _ = rfFiles.value - val logger = streams.value.log - val staging = (Docker / target).value - val ver = (version in LocalRootProject).value - - logger.info(s"Running docker build in $staging") - val imageName = rfDockerImageName.value - Process("docker-compose build", staging).! - Process(s"docker tag $imageName:latest $imageName:$ver", staging).! - staging -} diff --git a/deployment/docker/jupyter/Dockerfile b/deployment/docker/jupyter/Dockerfile deleted file mode 100644 index ebf52fdac..000000000 --- a/deployment/docker/jupyter/Dockerfile +++ /dev/null @@ -1,69 +0,0 @@ -FROM jupyter/pyspark-notebook:92fe05d1e7e5 - -MAINTAINER Astraea, Inc. - -ENV RF_LIB_LOC /usr/lib -ENV RF_JAR $RF_LIB_LOC/rasterframes.jar -ENV PY_RF_ZIP $RF_LIB_LOC/pyrasterframes.zip - -USER root - -RUN echo "spark.driver.extraClassPath $RF_JAR" >> /usr/local/spark/conf/spark-defaults.conf && \ - echo "spark.executor.extraClassPath $RF_JAR" >> /usr/local/spark/conf/spark-defaults.conf - -EXPOSE 4040 4041 4042 4043 4044 - -ENV SPARK_OPTS $SPARK_OPTS \ - --py-files $PY_RF_ZIP \ - --jars $RF_JAR \ - --driver-class-path $RF_JAR \ - --conf spark.executor.extraClassPath=$RF_JAR - -ENV PYTHONPATH $PYTHONPATH:$PY_RF_ZIP - - -#================================ -# Copied from all-spark-notebook -#================================ - -# TODO: resolve the issue that toree has with --py-files, above (it does not like .zips and -# TODO: the kernel will not start) -# Apache Toree kernel -#RUN pip install --no-cache-dir \ -# https://dist.apache.org/repos/dist/dev/incubator/toree/0.2.0-incubating-rc5/toree-pip/toree-0.2.0.tar.gz \ -# && \ -# jupyter toree install --sys-prefix && \ -# rm -rf /home/$NB_USER/.local && \ -# fix-permissions $CONDA_DIR && \ -# fix-permissions /home/$NB_USER - -# Spylon-kernel -RUN conda install --quiet --yes 'spylon-kernel=0.4*' && \ - conda clean -tipsy && \ - python -m spylon_kernel install --sys-prefix - -# Sphinx (for Notebook->html) -RUN conda install --quiet --yes \ - sphinx nbsphinx - -# Cleanup pip residuals -RUN rm -rf /home/$NB_USER/.local && \ - fix-permissions $CONDA_DIR && \ - fix-permissions /home/$NB_USER - -# Do these after the standard environment setup -# since these change more regularly. -COPY *.zip $PY_RF_ZIP -COPY *.jar $RF_JAR - -RUN chown -R $NB_UID:$NB_GID $HOME - -USER $NB_UID - -# RUN pip install guzzle_sphinx_theme - -# TODO: This repo can change regularly without docker knowing that the -# TODO: Layer this command is written in has become stale. Need to either -# TODO: clone a specific revision that we manually update, or keep this -# TODO: last, assuming the prior commends will be detected as stale. -RUN git clone http://github.com/s22s/rasterframes-book && ln -s rasterframes-book/Python/samples diff --git a/deployment/docker/jupyter/README.md b/deployment/docker/jupyter/README.md deleted file mode 100644 index 815b78d8e..000000000 --- a/deployment/docker/jupyter/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# RasterFrames Jupyter Notebook - -Please visit `rasterframe-book` directory for example notebooks. \ No newline at end of file diff --git a/deployment/docker/jupyter/docker-compose.yml b/deployment/docker/jupyter/docker-compose.yml deleted file mode 100644 index 29f311c0f..000000000 --- a/deployment/docker/jupyter/docker-compose.yml +++ /dev/null @@ -1,18 +0,0 @@ -version: '3' - -services: - rasterframes-notebooks: - build: . - image: s22s/rasterframes-notebooks - ports: - # jupyter notebook port - - "8888:8888" - # spark UI ports - - "4040:4040" - - "4041:4041" - - "4042:4042" - - "4043:4043" - - "4044:4044" -# To save locally at './work' from the container: -# volumes: -# - ./work:/home/jovyan/work \ No newline at end of file diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..0c4925ba1 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,128 @@ +# RasterFrames Documentation + +The conceptual principles to consider when writing RasterFrames users' documentation are covered in [Documentation Principles](documentation-principles.md). This document covers the mechanics of writing, evaluating, and building the documentation during the writing process. + +## Organization + +The documentation build is a two step process, whereby two sources (three if API docs are included) are merged together and converted into a static HTML website. They are: + +* Technical content and Python examples: `/pyrasterframes/src/main/python/docs` +* Global documentation assets and Scala specific content: `/docs/src/main/paradox` + +The build constructs in `/docs` are (due to legacy reasons) the top-level mechanisms of bringing it all together, but the meat of the content is in `/pyrasterframes/...`, and will be the focus of most of this document. + +## Prerequisites + +* [`sbt`](https://www.scala-sbt.org/) +* Python 3 +* Markdown editor. [Visual Studio Code](https://code.visualstudio.com/) with [`language-weave` extension](https://marketplace.visualstudio.com/items?itemName=jameselderfield.language-weave) is one option. [Atom](https://atom.io/) is another which might actually have better support for evaluating code in Markdown, but I've not tried it. + +> Note: If you're using Visual Studio Code, you can associate the `.pymd` with the `language-weave` plugin by adding this to your `settings.json` file. + +```json +"files.associations": { + "*.pymd": "pweave_md" +} +``` + +## Building the docs + +To build the static site locally: + + sbt makeSite + +The site will be at `/docs/target/site/index.html`. + + +To start an interactive server running the docs: + + sbt previewSite + +The sbt server log a message with an address to view the site. + +## Content Development Process + +Start with one of the existing files in `/pyrasterframes/src/main/python/docs` as a template. [`local-algebra.pymd`](../pyrasterframes/src/main/python/docs/local-algebra.pymd) is a good example. If the content will have code blocks you want evaluated an results injected into the output, use the file extension `.pymd`. If the content doesn't use evaluatable code blocks, use `.md`. + +All `.pymd` files are processed with a tool called [Pweave](http://mpastell.com/pweave), which produces a regular Markdown file where identified code blocks are evaluated and their results (optionally) included in the text. Matplot lib is supported! It is much like `knitr` in the R community. If we run into issues with Pweave, we can also consider [`knitpy`](https://github.com/jankatins/knitpy) or [`codebraid`](https://github.com/gpoore/codebraid). Codebraid looks particularly powerful, so we may think to transition to it. + +Pweave has a number of [code chunk options](http://mpastell.com/pweave/chunks.html) for controlling the output. Refer to those documents on details, and experiment a little to see what conveys your intent best. + +To set up an environment whereby you can easily test/evaluate your code blocks during writing: + +1. Run `sbt` from the `` directory. You should get output that looks something like: + ``` + $ sbt + ... + [info] Loading settings for project pyrasterframes from build.sbt ... + [info] Loading settings for project rf-notebook from build.sbt ... + [info] Set current project to RasterFrames (in build file:/) + sbt:RasterFrames> + ``` +2. The first time you check out the code, or whenever RasterFrames code is updated, you need to build the project artifacts so they are available for Pweave. Some docs also refer to test resources, so the easiest way to do it is to run the unit tests. + ``` + sbt:RasterFrames> pyrasterframes/test + [info] Compiling 4 Scala sources to /core/target/scala-2.11/classes ... + ... lots of noise ... + [info] PyRasterFrames assembly written to '/pyrasterframes/target/python/deps/jars/pyrasterframes-assembly-0.8.0-SNAPSHOT.jar' + [info] Synchronizing 44 files to '/pyrasterframes/target/python' + [info] Running 'python setup.py build bdist_wheel' in '/pyrasterframes/target/python' + ... more noise ... + [info] Python .whl file written to '/pyrasterframes/target/python/dist/pyrasterframes-0.8.0.dev0-py2.py3-none-any.whl' + [success] Total time: 83 s, completed Jul 5, 2019 12:25:48 PM + sbt:RasterFrames> + ``` +3. To evaluate all the `.pymd` files, run: + ``` + sbt:RasterFrames> pyrasterframes/pySetup pweave + ``` + To build the artifact (step 1) and evaluate all the `.pymd` files, you can run: + ``` + sbt:RasterFrames> pyrasterframes/doc + ``` + There's a command alias for this last step: `pyDocs`. +4. To evaluate a single `.pymd` file, you pass the `-s` option and the filename relative to the `pyraterframes/src/main/python` directory. You can also specify the output [format](http://mpastell.com/pweave/formats.html) with the `-f` argument. + ``` + sbt:RasterFrames> pyrasterframes/pySetup pweave -s docs/getting-started.pymd + [info] Synchronizing 44 files to '/pyrasterframes/target/python' + [info] Running 'python setup.py pweave -s docs/getting-started.pymd' in '/pyrasterframes/target/python' + running pweave + -------------------------------------------------- + Running getting-started + -------------------------------------------------- + status + status + Processing chunk 1 named None from line 14 + ... + Weaved docs/getting-started.pymd to docs/getting-started.md + ``` +5. The _output_ Markdown files are written to `/pyrasterframes/target/python/docs`. _Note_: don't edit any files in the `pyrasterframes/target` directory... they will get overwritten each time `sbt` runs a command. +6. During content development it's sometimes helpful to see the output rendered as basic HTML. To do this, add the `-f html` option to the pweave command: + ``` + sbt:RasterFrames> pyrasterframes/pySetup pweave -f html -s docs/getting-started.pymd + [info] Synchronizing 54 files to '/pyrasterframes/target/python' + [info] Running 'python setup.py pweave -f html -s docs/getting-started.pymd' in '/pyrasterframes/target/python' + running pweave + -------------------------------------------------- + Running getting-started + -------------------------------------------------- + ... + Weaved docs/getting-started.pymd to docs/getting-started.html + ``` + Note: This feature requires `pandoc` to be installed. +7. To build all the documentation and convert to a static html site, run: + ```bash + sbt makeSite + ``` + Results will be found in `/docs/target/site`. + +## Notebooks + +The `rf-notebooks` sub-project creates a Docker image with Jupyter Notebooks pre-configured with RasterFrames. Any `.pymd` file under `.../python/docs/` is converted to an evaluated Jupyter Notebook and included as a part of the build. + +## Submission Process + +Submit new and updated documentation as a PR against locationtech/rasterframes. Make sure you've signed the Eclipse Foundation ECA and you ["Signed-off-by:"](https://stackoverflow.com/questions/1962094/what-is-the-sign-off-feature-in-git-for) each commit in the PR. The "Signed-off-by" email address needs to be the exact same one as registered with the [Eclipse Foundation](https://wiki.eclipse.org/Development_Resources/Contributing_via_Git). + +If you are using circle CI, the circle configuration is set up to build the docs with `sbt makeSite` for branch names matching `feature/.*docs.*` or `docs/.*` + diff --git a/docs/build.sbt b/docs/build.sbt index 3025e7c75..59f734a48 100644 --- a/docs/build.sbt +++ b/docs/build.sbt @@ -1,41 +1,79 @@ -import com.typesafe.sbt.SbtGit.git +// task to create documentation PDF +lazy val makePDF = taskKey[File]("Build PDF version of documentation") +lazy val pdfFileName = settingKey[String]("Name of the PDF file generated") +pdfFileName := s"RasterFrames-Users-Manual-${version.value}.pdf" -enablePlugins(SiteScaladocPlugin, ParadoxPlugin, TutPlugin, GhpagesPlugin, ScalaUnidocPlugin) +makePDF := { + import scala.sys.process._ -name := "rasterframes-docs" + // Get the python source directory configured in the root project. + val base = (Compile / paradox / sourceDirectories).value.find(_.toString.contains("python")).head -libraryDependencies ++= Seq( - spark("mllib").value % Tut, - spark("sql").value % Tut -) + // Hard coded lacking any simple way of determining order. + val files = Seq( + "index.md", + "description.md", + "concepts.md", + "getting-started.md", + "raster-io.md", + "raster-catalogs.md", + "raster-read.md", + "raster-write.md", + "vector-data.md", + "raster-processing.md", + "local-algebra.md", + "nodata-handling.md", + "aggregation.md", + "time-series.md", + "machine-learning.md", + "unsupervised-learning.md", + "supervised-learning.md", + "numpy-pandas.md", + "languages.md", + "reference.md" + ).map(base ** _).flatMap(_.get) -git.remoteRepo := "git@github.com:locationtech/rasterframes.git" -apiURL := Some(url("http://rasterframes.io/latest/api")) -autoAPIMappings := true -ghpagesNoJekyll := true + val log = streams.value.log + log.info("Section ordering:") + files.foreach(f => log.info(" - " + f.getName)) -ScalaUnidoc / siteSubdirName := "latest/api" -paradox / siteSubdirName := "." + val work = target.value / "makePDF" + work.mkdirs() -addMappingsToSiteDir(ScalaUnidoc / packageDoc / mappings, ScalaUnidoc / siteSubdirName) -addMappingsToSiteDir(Compile / paradox / mappings, paradox / siteSubdirName) + val prepro = files.zipWithIndex.map { case (f, i) ⇒ + val dest = work / f"$i%02d-${f.getName}%s" + // Filter cross links and add a newline + (Seq("sed", "-e", """s/@ref://g;s/@@.*//g""", f.toString) #> dest).! + // Add newline at the end of the file so as to make pandoc happy + ("echo" #>> dest).! + ("echo \\pagebreak" #>> dest).! + dest + } -paradoxProperties ++= Map( - "github.base_url" -> "https://github.com/locationtech/rasterframes", - "version" -> version.value, - "scaladoc.org.apache.spark.sql.gt" -> "http://rasterframes.io/latest" - //"scaladoc.geotrellis.base_url" -> "https://geotrellis.github.io/scaladocs/latest", - // "snip.pyexamples.base_dir" -> (baseDirectory.value + "/../pyrasterframes/python/test/examples") -) -paradoxTheme := Some(builtinParadoxTheme("generic")) -//paradoxTheme / sourceDirectory := sourceDirectory.value / "main" / "paradox" / "_template" + val output = target.value / pdfFileName.value -Compile / doc / scalacOptions++= Seq( "-J-Xmx6G", "-no-link-warnings") + val header = (Compile / sourceDirectory).value / "latex" / "header.latex" -Tut / run / fork := true + val args = "pandoc" :: + "--from=markdown+pipe_tables" :: + "--to=pdf" :: + "-t" :: "latex" :: + "-s" :: + "--toc" :: + "-V" :: "title:RasterFrames Users' Manual" :: + "-V" :: "author:Astraea, Inc." :: + "-V" :: "geometry:margin=0.75in" :: + "-V" :: "papersize:letter" :: + "--include-in-header" :: header.toString :: + "-o" :: output.toString :: + prepro.map(_.toString).toList -Tut / run / javaOptions := Seq("-Xmx8G", "-Dspark.ui.enabled=false") + log.info(s"Running: ${args.mkString(" ")}") + Process(args, base).! -Compile / paradox := (Compile / paradox).dependsOn(tutQuick).value -Compile / paradox / sourceDirectory := tutTargetDirectory.value -makeSite := makeSite.dependsOn(Compile / unidoc).dependsOn(Compile / paradox).value + log.info("Wrote: " + output) + + output +} + +makePDF := makePDF.dependsOn(Compile / paradox).value diff --git a/docs/documentation-principles.md b/docs/documentation-principles.md new file mode 100644 index 000000000..3f626ddb8 --- /dev/null +++ b/docs/documentation-principles.md @@ -0,0 +1,98 @@ +# Documentation Principles + +This document outlines some concrete considerations for the planned rewrite of the _RasterFrames Users' Manual_. +See [`docs/README.md`](https://github.com/locationtech/rasterframes/blob/develop/docs/README.md) for technical details on the mechanics of building the documentation. + +## Title + +The project is "RasterFrames". Documentation shall use the name "RasterFrames". The RasterFrames runtime is deployed in two forms: `rasterframes` (for Scala/Java/SQL-only), and `pyrasterframes` (Python). But the user should know and think of the project as one thing: RasterFrames. + +## Format + +The documentation shall be rendered in Markdown (`.md`) and Python Markdown (`.pymd`). The source of this documentation is committed in the same project as the code, in `pyrasterframes/src/main/python/docs`. Additional details on processing the docs can be found [here](https://github.com/locationtech/rasterframes/tree/develop/pyrasterframes#running-python-markdown-sources). + +Filenames shall be in skewer case; lower case with dashes ('-') separating words. For example, `foo-in-bar-with-baz.md`. + +## Target Audience + +The target audience for the _RasterFrames User's Manual_ is the intermediate data scientist or developer, already adept at either Python or Scala. Eventually this should be expanded to include SQL adepts. This user may or may not be an expert at EO data usage, so attention to jargon, undefined terms, new concepts etc. should be kept in mind, making use of authoritative external resources to fill in knowledge. + +> Enumerate concepts they are aware of, including: +> * Scene discretization +> * Temporal revisit rate +> * Spatial resolution +> * Spatial extent + +The user may or may not be familiar with Apache Spark, so they should also be guided in filling in minimum requisite knowledge. At a minimum we have to explain what a `SparkSession` is, and that we have to configure it; note the difference between an "action" and "transformation"; what a "collect" action is, and the consequences if the data is large; awareness of partitioning. + +There's also an opportunity to emphasize the scalability benefits over, say, a rasterio/Pandas-only solution (but that we interop with them too). + +The users' goals with EO data are could be from a number of different perspectives: + +* Creating map layers +* Statistical analysis +* Machine learning +* Change detection +* Chip preparation + +While the emphasis of the documentation should be on the core capabilities (and flexibility therein) of RasterFrames, a nod toward these various needs in the examples shown can go a long way in helping the user understand appropriateness of the library to their problem domain. + +## Pedagogical Technique + +The documentation shall emphasize the use of executable code examples, with interspersed prose to explain them. The RasterFrames tooling supports the `.pymd` (Python Markdown) format whereby delimited code blocks are evaluated at build time in order to include output/results. The project currently uses ['Pweave'](http://mpastell.com/pweave/chunks.html) to do this (this may change, but `.pymd` will remain the source format). `Pweave` also has the ability to convert `.pymd` to Jupyter Notebooks, which may serve useful. Through this process we can be assured that any examples shown are code the user can copy into their own projects. + +Visuals are always helpful, but even more so when there's a lot of code involved that needs continual contextualization and explanation. `Pweave` supports rendering of `matplotlib` charts/images, a capability we should make use of. Furthermore, where beneficial, we should create diagrams or other visuals to help express concepts and relationships. + +This "code-first" focus is admittedly in tension with the competing need to explain some of the more abstract aspects of distributed computing necessary for advanced monitoring, profiling, optimization, and deployment. We should evolve the documentation over the long term to address some of these needs, but in the near term the focus should be on the core conceptual model necessary for understanding tile processing. Diagrams can be helpful here. + +## Polyglot Considerations + +In terms of implementation, RasterFrames is a Scala project first. All algorithmic, data modeling, and heavy lifting, etc. are implemented in Scala. + +However, due to user base preferences, RasterFrames is primarily _deployed_ through Python. As such, documentation, examples, etc. should first be implemented in Python. + +Secondarily to that, SQL should be used to highlight the analyst-friendly expression of the functionality in SQL. At least a handful of examples in a dedicated SQL page would go far in showing the cross-language support. + +Thirdly, Scala developers should be encouraged to use the platform, clearly stating that the APIs are on equal footing, using consistent naming conventions, etc. and that most examples should translate almost one-to-one. + +In the long term I'd love to see Python, Scala, and SQL all treated in equal footing, with examples expressed in all languages, but that's a tall order this early in the project development. + +## User Journey + +As noted in the _Target Audience_ section, the documentation needs to guide the user through process from curiosity around EO data to scalable processing of it. Within the first section or so the user should see an example that reads an image and does something somewhat compelling with it, noting that the same code will work on a laptop with a small amount of imagery, as well as on 100s of computers with TB (or more) of imagery. Problems "solved in the small" can be grown to "global scale". + +With a "journey" focus, concepts and capabilities are introduced incrementally, building upon previous examples and concepts, adding more complexity as it develops. Once the fundamentals are covered, we then move into the examples that are closer to use-cases or cookbook entries. + +The preliminary outline is a follows, but is open for refinement, rethinking, etc. + +1. Description +2. Architecture +3. Getting Started + * `pyspark` shell + * Jupyter Notebook + * Standalone Python Script +4. Raster Data I/O + * Reading Raster Data + * Writing Raster Data +5. Spatial Relations +6. Raster Processing + * Local Algebra + * “NoData” Handling + * Aggregation + * Time Series + * Spark ML Support + * Pandas and NumPy Interoperability +7. Cookbook Examples +8. Extended Examples / Case Studies +9. Function Reference + +## Hiccups + +During the documentation process we are likely to run into problems whereby the goal of the writer is inhibited by a bug or capability gap in the code. We should use this opportunity to improve the library to provide the optimal user experience before resorting to workarounds or hacks or addition of what might seem to be arbitrary complexity. + +## Testing + +To "be all that we can be", testing the documentation against a new user is a boon. It may be hard to capture volunteers to do this, but we should consider enlisting interns and friends of the company to go through the documentation and give feedback on where gaps exist. + + + diff --git a/docs/src/main/latex/header.latex b/docs/src/main/latex/header.latex new file mode 100644 index 000000000..50a53b5da --- /dev/null +++ b/docs/src/main/latex/header.latex @@ -0,0 +1,9 @@ +\DeclareUnicodeCharacter{2218}{$\circ$} +\DeclareUnicodeCharacter{2714}{$\checkmark$} +\DeclareUnicodeCharacter{21A9}{$\newline$} +\hypersetup{ + colorlinks=true, + linkcolor=blue, + allbordercolors={0 0 0}, + pdfborderstyle={/S/U/W 1} +} \ No newline at end of file diff --git a/docs/src/main/tut/CNAME b/docs/src/main/paradox/CNAME similarity index 100% rename from docs/src/main/tut/CNAME rename to docs/src/main/paradox/CNAME diff --git a/docs/src/main/paradox/RasterFramePipeline.png b/docs/src/main/paradox/RasterFramePipeline.png new file mode 100644 index 000000000..26900b8cf Binary files /dev/null and b/docs/src/main/paradox/RasterFramePipeline.png differ diff --git a/docs/src/main/tut/RasterFramePipeline.svg b/docs/src/main/paradox/RasterFramePipeline.svg similarity index 100% rename from docs/src/main/tut/RasterFramePipeline.svg rename to docs/src/main/paradox/RasterFramePipeline.svg diff --git a/docs/src/main/paradox/_template/page.st b/docs/src/main/paradox/_template/page.st index c264d9e7c..2a32dd170 100644 --- a/docs/src/main/paradox/_template/page.st +++ b/docs/src/main/paradox/_template/page.st @@ -31,6 +31,8 @@ .md-left { float: left; } .md-right { float: right; } .md-clear { clear: both; } + table { font-size: 80%; } + code { font-size: 0.75em !important; } @@ -132,7 +134,7 @@