Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle sparse GeoSeries better and add expand_values to convert sparse to dense. #1147

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/cuspatial/cuspatial/core/binops/intersection.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ def pairwise_linestring_intersection(
not contains_only_linestrings(s) for s in [linestrings1, linestrings2]
):
raise ValueError("Input GeoSeries must contain only linestrings.")
if len(linestrings1._column.lines) != len(linestrings2._column.lines):
linestrings1 = linestrings1.expand_values()
linestrings2 = linestrings2.expand_values()

geoms, look_back_ids = c_pairwise_linestring_intersection(
linestrings1._column.lines._column, linestrings2._column.lines._column
Expand Down
50 changes: 49 additions & 1 deletion python/cuspatial/cuspatial/core/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def __init__(

@property
def feature_types(self):
return self._column._meta.input_types
return self._column._meta.input_types.reset_index(drop=True)

@property
def type(self):
Expand Down Expand Up @@ -997,6 +997,54 @@ def reset_index(
self.index = cudf_series.index
return None

def expand_values(self):
"""Converts a potentially sparse view of a GeoSeries into a
dense view. Makes a copy if self is already dense."""
dense_points = cudf.Series([])
dense_mpoints = cudf.Series([])
dense_lines = cudf.Series([])
dense_polygons = cudf.Series([])
if len(self._column.points) > 0:
dense_points = self._column.points[
self._column._meta.union_offsets[
self._column._meta.input_types == Feature_Enum.POINT.value
]
]
if len(self._column.mpoints) > 0:
dense_mpoints = self._column.mpoints[
self._column._meta.union_offsets[
self._column._meta.input_types
== Feature_Enum.MULTIPOINT.value
]
]
if len(self._column.lines) > 0:
dense_lines = self._column.lines[
self._column._meta.union_offsets[
self._column._meta.input_types
== Feature_Enum.LINESTRING.value
]
]
if len(self._column.polygons) > 0:
dense_polygons = self._column.polygons[
self._column._meta.union_offsets[
self._column._meta.input_types
== Feature_Enum.POLYGON.value
]
]
column = GeoColumn(
(
dense_points,
dense_mpoints,
dense_lines,
dense_polygons,
),
{
"input_types": self._column._meta.input_types,
"union_offsets": self._column._meta.union_offsets,
},
)
return GeoSeries(column)

def contains(self, other, align=False, allpairs=False, mode="full"):
"""Returns a `Series` of `dtype('bool')` with value `True` for each
aligned geometry that contains _other_.
Expand Down
166 changes: 166 additions & 0 deletions python/cuspatial/cuspatial/tests/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,3 +757,169 @@ def test_from_polygons_xy_example():
[Polygon([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (0, 0)])]
)
gpd.testing.assert_geoseries_equal(gpolygon.to_geopandas(), hpolygon)


def test_expand_values_points():
point_base = cuspatial.GeoSeries([Point(0, 0), Point(1, 1)])
point_base._column._meta.input_types = cudf.Series([0, 0, 0])
point_base._column._meta.union_offsets = cudf.Series([0, 1, 0])
point_base._index = cudf.Series([0, 1, 2])
point_expanded = point_base.expand_values()
assert len(point_expanded._column.points) == 3


def test_expand_values_multipoints():
multipoint_base = cuspatial.GeoSeries(
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3)])]
)
multipoint_base._column._meta.input_types = cudf.Series([1, 1, 1, 1, 1, 1])
multipoint_base._column._meta.union_offsets = cudf.Series(
[0, 1, 0, 1, 0, 1]
)
multipoint_base._index = cudf.Series([0, 1, 2, 3, 4, 5])
multipoint_expanded = multipoint_base.expand_values()
assert len(multipoint_expanded._column.mpoints) == 6


def test_expand_values_linestrings():
linestring_base = cuspatial.GeoSeries(
[
LineString([(0, 0), (1, 1), (2, 2)]),
LineString([(3, 3), (4, 4), (5, 5)]),
]
)
linestring_base._column._meta.input_types = cudf.Series(
[2, 2, 2, 2, 2, 2, 2, 2, 2]
)
linestring_base._column._meta.union_offsets = cudf.Series(
[0, 1, 0, 1, 0, 1, 0, 1, 0]
)
linestring_base._index = cudf.Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
linestring_expanded = linestring_base.expand_values()
assert len(linestring_expanded._column.lines) == 9


def test_expand_values_polygons():
polygon_base = cuspatial.GeoSeries(
[
Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]),
Polygon([(3, 3), (4, 4), (5, 5), (3, 3)]),
]
)
polygon_base._column._meta.input_types = cudf.Series(
[3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
)
polygon_base._column._meta.union_offsets = cudf.Series(
[0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
)
polygon_base._index = cudf.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
polygon_expanded = polygon_base.expand_values()
assert len(polygon_expanded._column.polygons) == 10


def test_expand_values_mixed():
"""This test demonstrates expand_values on a GeoSeries with mixed geometry
types as well as repeats."""
mixed_base = cuspatial.GeoSeries(
[
Polygon([(94, 94), (92, 92), (93, 93), (94, 94)]),
Point(0, 0),
LineString([(95, 95), (96, 96), (97, 97)]),
MultiPoint([(1, 1), (2, 2)]),
LineString([(3, 3), (4, 4), (5, 5)]),
MultiPoint([(98, 98), (99, 99)]),
Polygon([(6, 6), (7, 7), (8, 8), (6, 6)]),
Point(100, 100),
]
)
mixed_base._column._meta.input_types = cudf.Series(
[3, 0, 2, 1, 2, 1, 3, 0, 3, 0, 2, 1, 2, 1, 3, 0],
)
mixed_base._column._meta.union_offsets = cudf.Series(
[1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1],
)
mixed_base._index = cudf.RangeIndex(16)
mixed_expanded = mixed_base.expand_values()
assert len(mixed_expanded._column.points) == 4
assert len(mixed_expanded._column.mpoints) == 4
assert len(mixed_expanded._column.lines) == 4
assert len(mixed_expanded._column.polygons) == 4
pd.testing.assert_series_equal(
mixed_expanded.to_geopandas(),
gpd.GeoSeries(
[
Polygon([(94, 94), (92, 92), (93, 93), (94, 94)]),
Point(0, 0),
LineString([(95, 95), (96, 96), (97, 97)]),
MultiPoint([(1, 1), (2, 2)]),
LineString([(3, 3), (4, 4), (5, 5)]),
MultiPoint([(98, 98), (99, 99)]),
Polygon([(6, 6), (7, 7), (8, 8), (6, 6)]),
Point(100, 100),
Polygon([(94, 94), (92, 92), (93, 93), (94, 94)]),
Point(0, 0),
LineString([(95, 95), (96, 96), (97, 97)]),
MultiPoint([(1, 1), (2, 2)]),
LineString([(3, 3), (4, 4), (5, 5)]),
MultiPoint([(98, 98), (99, 99)]),
Polygon([(6, 6), (7, 7), (8, 8), (6, 6)]),
Point(100, 100),
]
),
)


def test_expand_values_multi():
mixed_base = cuspatial.GeoSeries(
[
MultiPolygon(
[
Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]),
Polygon([(3, 3), (4, 4), (5, 5), (3, 3)]),
]
),
MultiLineString(
[
LineString([(6, 6), (6, 6), (7, 7)]),
LineString([(8, 8), (9, 9), (10, 10)]),
]
),
]
)
mixed_base._column._meta.input_types = cudf.Series([3, 2, 3, 2])
mixed_base._column._meta.union_offsets = cudf.Series([0, 0, 0, 0])
mixed_base._index = cudf.RangeIndex(4)
mixed_expanded = mixed_base.expand_values()
assert len(mixed_expanded._column.polygons) == 2
assert len(mixed_expanded._column.lines) == 2
pd.testing.assert_series_equal(
mixed_expanded.to_geopandas(),
gpd.GeoSeries(
[
MultiPolygon(
[
Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]),
Polygon([(3, 3), (4, 4), (5, 5), (3, 3)]),
]
),
MultiLineString(
[
LineString([(6, 6), (6, 6), (7, 7)]),
LineString([(8, 8), (9, 9), (10, 10)]),
]
),
MultiPolygon(
[
Polygon([(0, 0), (1, 1), (2, 2), (0, 0)]),
Polygon([(3, 3), (4, 4), (5, 5), (3, 3)]),
]
),
MultiLineString(
[
LineString([(6, 6), (6, 6), (7, 7)]),
LineString([(8, 8), (9, 9), (10, 10)]),
]
),
]
),
)