Skip to content

Commit

Permalink
updated data misc with worldcover download
Browse files Browse the repository at this point in the history
  • Loading branch information
bpstewar committed Nov 22, 2024
1 parent 01e0c93 commit b452604
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 52 deletions.
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ Future releases can be built from source, but pip will contain the most recent s

Please refer to the World Bank's Github [Contributing](docs/CONTRIBUTING.md) guidelines.

## Code of Conduct

The <span style="color:#3EACAD">template</span> maintains a [Code of Conduct](docs/CODE_OF_CONDUCT.md) to ensure an inclusive and respectful environment for everyone. Please adhere to it in all interactions within our community.

## License

This project is licensed under the [**Mozilla Public License**](https://www.mozilla.org/en-US/MPL).
28 changes: 13 additions & 15 deletions docs/_config.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Book settings
title:
title: GOSTrocks!
author: Geospatial Operations Support Team (GOST)
logo: docs/images/logo.png
only_build_toc_files: false
only_build_toc_files: true

repository:
url: https://github.com/worldbank/GOSTrocks
Expand All @@ -11,34 +11,32 @@ repository:
#######################################################################################
# HTML-specific settings
html:
home_page_in_navbar: false
home_page_in_navbar: true
extra_navbar: ""
use_edit_page_button: true
use_repository_button: true
use_issues_button: true
baseurl: https://worldbank.github.io/GOSTrocks
baseurl: https://github.com/worldbank/DECAT_Space2Stats
extra_footer: |
<div>
Country borders or names do not necessarily reflect the World Bank Group’s official position. All maps are for illustrative purposes and do not imply the expression of any opinion on the part of the World Bank, concerning the legal status of any country or territory or concerning the delimitation of frontiers or boundaries
<b>All content (unless otherwise specified) is subject to the <a href="https://raw.githubusercontent.com/worldbank/template/main/LICENSE">World Bank Master Community License Agreement.</a></b>
</div>
<div>
<b>All content (unless otherwise specified) is subject to the <a href="https://www.mozilla.org/en-US/MPL">Mozilla Public License.</a></b>
</div>
favicon: docs/images/favicon.ico

#######################################################################################
# Execution settings
execute:
execute_notebooks: off

#######################################################################################
# Bibliography settings
bibtex_bibfiles:
- docs/bibliography.bib
allow_errors: true
exclude_patterns:
- notebooks/*.ipynb

#######################################################################################
# Sphinx settings
sphinx:
config:
html_show_copyright: false
html_last_updated_fmt: "%b %d, %Y"
apidoc_module_dir: ../space2stats_api/src
extra_extensions:
- 'sphinx.ext.autodoc'
- sphinx.ext.napoleon
- sphinxcontrib.apidoc
95 changes: 87 additions & 8 deletions notebooks/AWS_Summarize.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,21 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import boto3\n",
"import urllib3\n",
"\n",
"import pandas as pd"
"import pandas as pd\n",
"\n",
"urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 5,
"metadata": {
"scrolled": true
},
Expand Down Expand Up @@ -96,15 +99,91 @@
"Completed loop: 62\n",
"Completed loop: 63\n",
"Completed loop: 64\n",
"Completed loop: 65\n"
"Completed loop: 65\n",
"Completed loop: 66\n",
"Completed loop: 67\n",
"Completed loop: 68\n",
"Completed loop: 69\n",
"Completed loop: 70\n",
"Completed loop: 71\n",
"Completed loop: 72\n",
"Completed loop: 73\n",
"Completed loop: 74\n",
"Completed loop: 75\n",
"Completed loop: 76\n",
"Completed loop: 77\n",
"Completed loop: 78\n",
"Completed loop: 79\n",
"Completed loop: 80\n",
"Completed loop: 81\n",
"Completed loop: 82\n",
"Completed loop: 83\n",
"Completed loop: 84\n",
"Completed loop: 85\n",
"Completed loop: 86\n",
"Completed loop: 87\n",
"Completed loop: 88\n",
"Completed loop: 89\n",
"Completed loop: 90\n",
"Completed loop: 91\n",
"Completed loop: 92\n",
"Completed loop: 93\n",
"Completed loop: 94\n",
"Completed loop: 95\n",
"Completed loop: 96\n",
"Completed loop: 97\n",
"Completed loop: 98\n",
"Completed loop: 99\n",
"Completed loop: 100\n",
"Completed loop: 101\n",
"Completed loop: 102\n",
"Completed loop: 103\n",
"Completed loop: 104\n",
"Completed loop: 105\n",
"Completed loop: 106\n",
"Completed loop: 107\n",
"Completed loop: 108\n",
"Completed loop: 109\n",
"Completed loop: 110\n",
"Completed loop: 111\n",
"Completed loop: 112\n",
"Completed loop: 113\n",
"Completed loop: 114\n",
"Completed loop: 115\n",
"Completed loop: 116\n",
"Completed loop: 117\n",
"Completed loop: 118\n",
"Completed loop: 119\n",
"Completed loop: 120\n",
"Completed loop: 121\n",
"Completed loop: 122\n",
"Completed loop: 123\n",
"Completed loop: 124\n",
"Completed loop: 125\n",
"Completed loop: 126\n",
"Completed loop: 127\n",
"Completed loop: 128\n",
"Completed loop: 129\n",
"Completed loop: 130\n",
"Completed loop: 131\n",
"Completed loop: 132\n",
"Completed loop: 133\n",
"Completed loop: 134\n",
"Completed loop: 135\n",
"Completed loop: 136\n",
"Completed loop: 137\n",
"Completed loop: 138\n",
"Completed loop: 139\n",
"Completed loop: 140\n",
"Completed loop: 141\n"
]
}
],
"source": [
"bucket = \"wbg-geography01\"\n",
"prefix = \"sylvera\"\n",
"region = \"us-east-1\"\n",
"s3client = boto3.client(\"s3\", region_name=region)\n",
"s3client = boto3.client(\"s3\", region_name=region, verify=False)\n",
"\n",
"# Loop through the S3 bucket and get all the file keys\n",
"more_results = True\n",
Expand Down Expand Up @@ -323,9 +402,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Earth Engine",
"display_name": "gostrocks",
"language": "python",
"name": "ee"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -337,7 +416,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
"version": "3.11.10"
}
},
"nbformat": 4,
Expand Down
32 changes: 17 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,34 +31,36 @@ dynamic = ["version"]
requires-python = ">=3.7"

dependencies = [
"rasterio",
"geopandas",
"pandas",
"numexpr > 2.6.8",
"numpy",
"pyproj",
"seaborn",
"awscli",
"affine",
"boto3",
"botocore",
"contextily",
"matplotlib",
"tqdm",
"xarray",
"osmnx",
"affine",
"PyOpenSSL >= 23.2",
"click",
"Sphinx",
"coverage",
"awscli",
"flake8",
"geopandas",
"matplotlib",
"numexpr > 2.6.8",
"numpy",
"osmnx",
"gdal",
"pandas",
"pyproj",
"PyOpenSSL >= 23.2",
"python-dotenv>=0.5.1",
"rasterio",
"s3fs",
"seaborn",
"tqdm",
"xarray"
]

[project.optional-dependencies]
docs = [
"docutils==0.17.1", # https://jupyterbook.org/en/stable/content/citations.html?highlight=docutils#citations-and-bibliographies
"jupyter-book >=1,<2",
"sphinx"
]

[project.urls]
Expand Down
66 changes: 63 additions & 3 deletions src/GOSTrocks/dataMisc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
import json
import urllib
import boto3
import boto3.session
import rasterio

import pandas as pd
import geopandas as gpd

from botocore.config import Config
from botocore import UNSIGNED
from osgeo import gdal

from . import rasterMisc as rMisc
import rasterMisc as rMisc


def download_WSF(
Expand Down Expand Up @@ -58,9 +60,9 @@ def aws_search_ntl(
:type verbose: bool, optional
"""
if unsigned:
s3client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
s3client = boto3.client("s3", verify=False, config=Config(signature_version=UNSIGNED))
else:
s3client = boto3.client("s3")
s3client = boto3.client("s3", verify=False)

# Loop through the S3 bucket and get all the keys for files that are .tif
more_results = True
Expand Down Expand Up @@ -148,3 +150,61 @@ def get_fathom_vrts(return_df=False):
vrt_pd["PATH"] = all_vrts
return vrt_pd
return all_vrts

def get_worldcover(df, download_folder, worldcover_vrt='WorldCover.vrt',
version='v200',
print_command=False, verbose=False):
""" Download ESA globcover from AWS (https://aws.amazon.com/marketplace/pp/prodview-7oorylcamixxc)
Parameters
----------
df : geopandas.GeoDataFrame
Data frame used to select tiles to download; selects tiles based on the data frame unary_union
download_folder : string
path to folder to download tiles
worldcover_vrt : str, optional
name of the VRT file to create, by default 'WorldCover.vrt'
version : str, optional
version of Worldcover to download, by default 'v200', other option is 'v100
print_command : bool, optional
if true, print the awscli commands to download the tiles. If false, uses boto3
to download the tiles, by default False
verbose : bool, optional
Print more updates during processing, by default False
"""

bucket='esa-worldcover'
esa_file_geojson = 'esa_worldcover_grid.geojson'
s3 = boto3.client('s3', verify=False, config=Config(signature_version=UNSIGNED))
tiles_geojson = os.path.join(download_folder, esa_file_geojson)

if not os.path.exists(tiles_geojson):
s3.download_file(bucket, esa_file_geojson, tiles_geojson)

tile_path = "{version}/2021/map/ESA_WorldCover_10m_2021_v200_{tile}_Map.tif"

in_tiles = gpd.read_file(tiles_geojson)
sel_tiles = in_tiles.loc[in_tiles.intersects(df.unary_union)]

all_tiles = []
for idx, row in sel_tiles.iterrows():
cur_tile_path = tile_path.format(tile=row['ll_tile'], version=version)
cur_out = os.path.join(download_folder, f"WorldCover_{row['ll_tile']}.tif")
all_tiles.append(cur_out)
if not os.path.exists(cur_out):
if print_command:
command = f"aws s3 --no-sign-request --no-verify-ssl cp s3://{bucket}/{cur_tile_path} {cur_out}"
print(command)
else:
if not os.path.exists(cur_out):
if verbose:
print(f"Downloading {cur_tile_path} to {cur_out}")
s3.download_file(bucket,cur_tile_path, cur_out)
else:
if verbose:
print(f"File {cur_out} already exists")
out_vrt = os.path.join(download_folder, worldcover_vrt)
gdal.BuildVRT(out_vrt, all_tiles, options=gdal.BuildVRTOptions())

return(all_tiles)

17 changes: 10 additions & 7 deletions src/GOSTrocks/rasterMisc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from rasterio.warp import reproject, Resampling, calculate_default_transform
from rasterio.merge import merge
from rasterio.io import MemoryFile
from rasterio.crs import CRS
from contextlib import contextmanager

curPath = os.path.realpath(
Expand Down Expand Up @@ -121,18 +122,21 @@ def project_raster(srcRst, dstCrs, output_raster=""):
"""project raster to destination crs
Args:
srcRst (_type_): _description_
dstCrs (_type_): _description_
output_raster (_type_): _description_
"""
srcRst (rasterio.datasetReader): input rasterio to reproject
dstCrs (int): crs to project to
output_raster (string): file to write to, defaults to '', which writes nothing
"""
if dstCrs.__class__ == int:
dstCrs = CRS.from_epsg(dstCrs)

transform, width, height = calculate_default_transform(
srcRst.crs, dstCrs, srcRst.width, srcRst.height, *srcRst.bounds
)
kwargs = srcRst.meta.copy()
kwargs.update(
{"crs": dstCrs, "transform": transform, "width": width, "height": height}
)

# open destination raster
dstRst = np.zeros([kwargs["count"], width, height], kwargs["dtype"])

Expand All @@ -147,9 +151,8 @@ def project_raster(srcRst, dstCrs, output_raster=""):
dst_crs=dstCrs,
resampling=Resampling.nearest,
)

if output_raster != "":
with rasterio.open(output_raster, "w", *kwargs) as out_raster:
with rasterio.open(output_raster, "w", **kwargs) as out_raster:
out_raster.write(dstRst)

return [dstRst, kwargs]
Expand Down

0 comments on commit b452604

Please sign in to comment.