Skip to content

Commit

Permalink
add boundless reads (#51)
Browse files Browse the repository at this point in the history
* add boundless reads

* add boundless read and fill value config

* add boundless read test cases

* fix test

* fix test

* update readme

* test boundless get tile
  • Loading branch information
geospatial-jeff authored Jun 14, 2020
1 parent ff0ed6b commit 21f66ad
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 11 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ Configuration options are exposed through environment variables:
- **INGESTED_BYTES_AT_OPEN** - defines the number of bytes in the first GET request at file opening (defaults to 16KB)
- **ENABLE_CACHE** - determines if range requests are cached in memory (defaults to TRUE)
- **HTTP_MERGE_CONSECUTIVE_RANGES** - determines if consecutive ranges are merged into a single request (defaults to FALSE)
- **BOUNDLESS_READ** - determines if internal tiles outside the bounds of the IFD are read (defaults to TRUE)
- **BOUNDLESS_READ_FILL_VALUE** - determines the value used to fill boundless reads (defaults to 0)
- **LOG_LEVEL** - determines the log level used by the package (defaults to ERROR)
- **VERBOSE_LOGS** - enables verbose logging, designed for use when `LOG_LEVEL=DEBUG` (defaults to FALSE)

Expand Down
19 changes: 15 additions & 4 deletions aiocogeo/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,17 @@ async def get_tile(self, x: int, y: int, z: int) -> np.ndarray:
if z > len(self.ifds):
raise TileNotFoundError(f"Overview {z} does not exist.")
ifd = self.ifds[z]
idx = (y * ifd.tile_count[0]) + x
if idx > len(ifd.TileOffsets):
raise TileNotFoundError(f"Tile {x} {y} {z} does not exist")
xmax, ymax = ifd.tile_count

# Return an empty array if tile is outside bounds of image
if x < 0 or y < 0 or x >= xmax or y >= ymax:
if not config.BOUNDLESS_READ:
raise TileNotFoundError(f"Internal tile {z}/{x}/{y} does not exist")
tile = np.full(
(ifd.bands, ifd.TileHeight.value, ifd.TileWidth.value),
fill_value=config.BOUNDLESS_READ_FILL_VALUE
)
return tile

# Request the tile
futures.append(
Expand All @@ -190,7 +198,7 @@ async def get_tile(self, x: int, y: int, z: int) -> np.ndarray:
# Apply mask
tile[1] = np.invert(np.broadcast_to(tile[1], tile[0].shape))
return np.ma.masked_array(*tile)
# Explicitely check for None because nodata is often 0
# Explicitly check for None because nodata is often 0
if ifd.nodata is not None:
return np.ma.masked_where(tile[0] == ifd.nodata, tile[0])
return tile[0]
Expand All @@ -212,6 +220,9 @@ async def read(self, bounds: Tuple[float, float, float, float], shape: Tuple[int
dtype=ifd.dtype
)

if not self._intersect_bounds(bounds, self.bounds):
raise TileNotFoundError("Partial read is outside bounds of the image")

# Request those tiles
if config.HTTP_MERGE_CONSECUTIVE_RANGES:
img_arr = await self._request_merged_tiles(img_tiles)
Expand Down
12 changes: 12 additions & 0 deletions aiocogeo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,15 @@
HTTP_MERGE_CONSECUTIVE_RANGES: bool = True if os.getenv(
"HTTP_MERGE_CONSECUTIVE_RANGES", "FALSE"
).upper() == "TRUE" else False


# Determines if internal tiles outside the bounds of the IFD are read. When set to ``TRUE`` (default), if a partial read
# isn't fully covered by internal tiles, missing tiles will be created using the fill value defined by the
# ``BOUNDLESS_READ_FILL_VALUE`` config option. When set to ``FALSE``, an exception will be raised instead
BOUNDLESS_READ: bool = False if os.getenv(
"BOUNDLESS_READ", "TRUE"
) == "FALSE" else True


# Determines the fill value used for boundless reads
BOUNDLESS_READ_FILL_VALUE: int = int(os.getenv("BOUNDLESS_READ_FILL_VALUE", "0"))
4 changes: 0 additions & 4 deletions aiocogeo/ifd.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,6 @@ def is_full_resolution(self) -> bool:
async def _get_tile(self, x: int, y: int) -> np.ndarray:
"""Read the requested tile from the IFD"""
idx = (y * self.tile_count[0]) + x
if idx > len(self.TileOffsets):
raise TileNotFoundError(f"Tile {x} {y} does not exist")
offset = self.TileOffsets[idx]
byte_count = self.TileByteCounts[idx] - 1
img_bytes = await self._file_reader.range_request(offset, byte_count)
Expand All @@ -154,8 +152,6 @@ class MaskIFD(ImageIFD):
async def _get_tile(self, x: int, y: int) -> np.ndarray:
"""Read the requested tile from the IFD"""
idx = (y * self.tile_count[0]) + x
if idx > len(self.TileOffsets):
raise TileNotFoundError(f"Tile {x} {y} does not exist")
offset = self.TileOffsets[idx]
byte_count = self.TileByteCounts[idx] - 1
img_bytes = await self._file_reader.range_request(offset, byte_count)
Expand Down
26 changes: 24 additions & 2 deletions aiocogeo/partial_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
from skimage.transform import resize

from .filesystems import Filesystem
from . import config
from .ifd import ImageIFD, MaskIFD
from .utils import run_in_background

Expand Down Expand Up @@ -81,7 +81,29 @@ async def read(
@property
def _add_mask(self) -> bool:
"""Determine if a mask needs to be added to the array"""
return True if self.is_masked or (self.nodata is not None) else False
if self.is_masked:
return True
if self.nodata is not None:
return True
return False


@staticmethod
def _intersect_bounds(
read_bounds: Tuple[float, float, float, float],
cog_bounds: Tuple[float, float, float, float]
) -> bool:
"""
Determine if a bounding box intersects another bounding box
https://github.com/cogeotiff/rio-tiler/blob/2.0a11/rio_tiler/utils.py#L254-L283
"""
return (
(cog_bounds[0] < read_bounds[2])
and (cog_bounds[2] > read_bounds[0])
and (cog_bounds[3] > read_bounds[1])
and (cog_bounds[1] < read_bounds[3])
)

def _get_overview_level(
self, bounds: Tuple[float, float, float, float], width: int, height: int
Expand Down
77 changes: 76 additions & 1 deletion tests/test_cog_reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import math
import random

from morecantile.models import TileMatrixSet
import mercantile
Expand All @@ -14,7 +15,7 @@
from aiocogeo import config
from aiocogeo.ifd import IFD
from aiocogeo.tag import Tag
from aiocogeo.errors import InvalidTiffError
from aiocogeo.errors import InvalidTiffError, TileNotFoundError

from .conftest import TEST_DATA

Expand Down Expand Up @@ -285,6 +286,80 @@ async def test_cog_read_merge_range_requests_with_internal_nodata_mask(create_co
assert tile_data.shape == tile_data_merged.shape


@pytest.mark.asyncio
async def test_boundless_read(create_cog_reader, monkeypatch):
infile = "http://async-cog-reader-test-data.s3.amazonaws.com/webp_web_optimized_cog.tif"
tile = mercantile.Tile(x=701, y=1634, z=12)
bounds = mercantile.xy_bounds(tile)

# Confirm an exception is raised if boundless reads are disabled
monkeypatch.setattr(config, "BOUNDLESS_READ", False)

async with create_cog_reader(infile) as cog:
with pytest.raises(TileNotFoundError):
tile = await cog.read(bounds=bounds, shape=(256,256))

monkeypatch.setattr(config, "BOUNDLESS_READ", True)
async with create_cog_reader(infile) as cog:
await cog.read(bounds=bounds, shape=(256,256))


@pytest.mark.asyncio
async def test_boundless_read_fill_value(create_cog_reader, monkeypatch):
infile = "http://async-cog-reader-test-data.s3.amazonaws.com/webp_web_optimized_cog.tif"
tile = mercantile.Tile(x=701, y=1634, z=12)
bounds = mercantile.xy_bounds(tile)


async with create_cog_reader(infile) as cog:
# Count number of pixels with a value of 1
tile = await cog.read(bounds=bounds, shape=(256,256))
counts = dict(zip(*np.unique(tile, return_counts=True)))
assert counts[1] == 127

# Set fill value of 1
monkeypatch.setattr(config, "BOUNDLESS_READ_FILL_VALUE", 1)

# Count number of pixels with a value of 1
tile = await cog.read(bounds=bounds, shape=(256,256))
counts = dict(zip(*np.unique(tile, return_counts=True)))
assert counts[1] == 166142


@pytest.mark.asyncio
@pytest.mark.parametrize(
"infile", TEST_DATA
)
async def test_boundless_get_tile(create_cog_reader, infile, monkeypatch):
async with create_cog_reader(infile) as cog:
fill_value = random.randint(0, 100)
monkeypatch.setattr(config, "BOUNDLESS_READ_FILL_VALUE", fill_value)

# Test reading tiles outside of IFD when boundless reads is enabled
tile = await cog.get_tile(x=-1, y=-1, z=0)
counts = dict(zip(*np.unique(tile, return_counts=True)))
assert counts[fill_value] == tile.shape[0] * tile.shape[1] * tile.shape[2]


@pytest.mark.asyncio
@pytest.mark.parametrize(
"infile", TEST_DATA
)
async def test_read_not_in_bounds(create_cog_reader, infile):
tile = mercantile.Tile(x=0,y=0,z=25)
bounds = mercantile.xy_bounds(tile)

async with create_cog_reader(infile) as cog:
if cog.epsg != 3857:
bounds = transform_bounds(
"EPSG:3857",
f"EPSG:{cog.epsg}",
*bounds
)
with pytest.raises(TileNotFoundError):
await cog.read(bounds=bounds, shape=(256,256))


@pytest.mark.asyncio
@pytest.mark.parametrize(
"width,height", [(500, 500), (1000, 1000), (5000, 5000), (10000, 10000)]
Expand Down

0 comments on commit 21f66ad

Please sign in to comment.