Skip to content

Initial geoarrow.shapely implementation #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions geoarrow/shapely/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# geoarrow.shapely

Interop between Shapely and GeoArrow in Python
54 changes: 54 additions & 0 deletions geoarrow/shapely/geoarrow/shapely/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from .extension_array import (
LineStringArray,
MultiLineStringArray,
MultiPointArray,
MultiPolygonArray,
PointArray,
PolygonArray,
)
from .extension_scalar import (
LineString,
MultiLineString,
MultiPoint,
MultiPolygon,
Point,
Polygon,
)
from .extension_types import (
CoordinateDimension,
LineStringType,
MultiLineStringType,
MultiPointType,
MultiPolygonType,
PointType,
PolygonType,
construct_geometry_array,
)


def register_geometry_extension_types():
import pyarrow as pa

for geom_type_class in [
PointType,
LineStringType,
PolygonType,
MultiPointType,
MultiLineStringType,
MultiPolygonType,
]:
# Provide a default to go into the registry, but at runtime, we can choose other
# type formulations
geom_type_instance = geom_type_class(
interleaved=True, dims=CoordinateDimension.XY
)
try:
pa.register_extension_type(geom_type_instance)

# If already registered with this id, unregister and re register
except pa.ArrowKeyError:
pa.unregister_extension_type(geom_type_instance.extension_name)
pa.register_extension_type(geom_type_instance)


register_geometry_extension_types()
124 changes: 124 additions & 0 deletions geoarrow/shapely/geoarrow/shapely/extension_array.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from __future__ import annotations

import numpy as np
import pyarrow as pa
from numpy.typing import NDArray

import shapely
from shapely import GeometryType


class PointArray(pa.ExtensionArray):
def to_shapely(self) -> NDArray[np.object_]:
"""Convert to an array of shapely geometries"""
flat_coords = (
self.storage.flatten()
.to_numpy()
.reshape(-1, len(self.type.coord_dimension))
)
return shapely.from_ragged_array(GeometryType.POINT, flat_coords, None)


class LineStringArray(pa.ExtensionArray):
def to_shapely(self) -> NDArray[np.object_]:
"""Convert to an array of shapely geometries"""

# TODO: shapely fails on version 2.0.1 with a read-only coords buffer, so we
# make a copy here by setting writable=True.
# ValueError: buffer source array is read-only
flat_coords = (
self.storage.flatten()
.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
geom_offsets = self.storage.offsets.to_numpy()
return shapely.from_ragged_array(
GeometryType.LINESTRING, flat_coords, (geom_offsets,)
)


class PolygonArray(pa.ExtensionArray):
def to_shapely(self) -> NDArray[np.object_]:
"""Convert to an array of shapely geometries"""

# TODO: shapely fails on version 2.0.1 with a read-only coords buffer, so we
# make a copy here by setting writable=True.
# ValueError: buffer source array is read-only
flat_coords = (
self.storage.flatten()
.flatten()
.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
geom_offsets = self.storage.offsets.to_numpy()
ring_offsets = self.storage.flatten().offsets.to_numpy()
return shapely.from_ragged_array(
GeometryType.POLYGON, flat_coords, (ring_offsets, geom_offsets)
)


class MultiPointArray(pa.ExtensionArray):
def to_shapely(self) -> NDArray[np.object_]:
"""Convert to an array of shapely geometries"""

# TODO: shapely fails on version 2.0.1 with a read-only coords buffer, so we
# make a copy here by setting writable=True.
# ValueError: buffer source array is read-only
flat_coords = (
self.storage.flatten()
.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
geom_offsets = self.storage.offsets.to_numpy()
return shapely.from_ragged_array(
GeometryType.MULTIPOINT, flat_coords, (geom_offsets,)
)


class MultiLineStringArray(pa.ExtensionArray):
def to_shapely(self) -> NDArray[np.object_]:
"""Convert to an array of shapely geometries"""

# TODO: shapely fails on version 2.0.1 with a read-only coords buffer, so we
# make a copy here by setting writable=True.
# ValueError: buffer source array is read-only
flat_coords = (
self.storage.flatten()
.flatten()
.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
geom_offsets = self.storage.offsets.to_numpy()
ring_offsets = self.storage.flatten().offsets.to_numpy()
return shapely.from_ragged_array(
GeometryType.MULTILINESTRING, flat_coords, (ring_offsets, geom_offsets)
)


class MultiPolygonArray(pa.ExtensionArray):
def to_shapely(self) -> NDArray[np.object_]:
"""Convert to an array of shapely geometries"""

# TODO: shapely fails on version 2.0.1 with a read-only coords buffer, so we
# make a copy here by setting writable=True.
# ValueError: buffer source array is read-only
flat_coords = (
self.storage.flatten()
.flatten()
.flatten()
.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
geom_offsets = self.storage.offsets.to_numpy()
polygon_offsets = self.storage.flatten().offsets.to_numpy()
ring_offsets = self.storage.flatten().flatten().offsets.to_numpy()
return shapely.from_ragged_array(
GeometryType.MULTIPOLYGON,
flat_coords,
(ring_offsets, polygon_offsets, geom_offsets),
)
144 changes: 144 additions & 0 deletions geoarrow/shapely/geoarrow/shapely/extension_scalar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from __future__ import annotations

import numpy as np
import pyarrow as pa

import shapely
from shapely import GeometryType

# TODO: change the Python repr of the pyarrow scalar value so that it doesn't call as_py
# and create a new GEOS object every time it prints the scalar?

# TODO: support separated coords; right now it assumes interleaved

# TODO: add tests where the selected scalar is _not_ the first polygon. The offsets are
# incorrect when not the first polygon.


class Point(pa.ExtensionScalar):
def to_shapely(self) -> shapely.Point:
return self.as_py()

def as_py(self) -> shapely.Point:
coords = self.value.values.to_numpy().reshape(
-1, len(self.type.coord_dimension)
)
geoms = shapely.from_ragged_array(
GeometryType.POINT,
coords,
None,
)
assert len(geoms) == 1
return geoms[0]


class LineString(pa.ExtensionScalar):
def to_shapely(self) -> shapely.LineString:
return self.as_py()

def as_py(self) -> shapely.LineString:
coords = (
self.value.values.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
geom_offsets = np.array([0, coords.shape[0]], dtype=np.int32)
geoms = shapely.from_ragged_array(
GeometryType.LINESTRING,
coords,
(geom_offsets,),
)
assert len(geoms) == 1
return geoms[0]


class Polygon(pa.ExtensionScalar):
def to_shapely(self) -> shapely.Polygon:
return self.as_py()

def as_py(self) -> shapely.Polygon:
coords = (
self.value.values.flatten()
.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
ring_offsets = self.value.values.offsets
geom_offsets = np.array([0, 1], dtype=np.int32)
geoms = shapely.from_ragged_array(
GeometryType.POLYGON,
coords,
(ring_offsets, geom_offsets),
)
assert len(geoms) == 1
return geoms[0]


class MultiPoint(pa.ExtensionScalar):
def to_shapely(self) -> shapely.MultiPoint:
return self.as_py()

def as_py(self) -> shapely.MultiPoint:
coords = (
self.value.values.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
geom_offsets = np.array([0, coords.shape[0]], dtype=np.int32)
geoms = shapely.from_ragged_array(
GeometryType.MULTIPOINT,
coords,
(geom_offsets,),
)
assert len(geoms) == 1
return geoms[0]


class MultiLineString(pa.ExtensionScalar):
def to_shapely(self) -> shapely.MultiLineString:
return self.as_py()

def as_py(self) -> shapely.MultiLineString:
coords = (
self.value.values.flatten()
.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
ring_offsets = self.value.values.offsets
geom_offsets = np.array([0, 1], dtype=np.int32)
geoms = shapely.from_ragged_array(
GeometryType.MULTILINESTRING,
coords,
(ring_offsets, geom_offsets),
)
assert len(geoms) == 1
return geoms[0]


class MultiPolygon(pa.ExtensionScalar):
def to_shapely(self) -> shapely.MultiPolygon:
return self.as_py()

def as_py(self) -> shapely.MultiPolygon:
coords = (
self.value.values.flatten()
.flatten()
.flatten()
.to_numpy(zero_copy_only=False, writable=True)
.reshape(-1, len(self.type.coord_dimension))
)
polygon_offsets = self.value.values.offsets
ring_offsets = self.value.values.flatten().offsets
geom_offsets = np.array([0, 1], dtype=np.int32)
geoms = shapely.from_ragged_array(
GeometryType.MULTIPOLYGON,
coords,
(
ring_offsets,
polygon_offsets,
geom_offsets,
),
)
assert len(geoms) == 1
return geoms[0]
Loading