From 47c06427def1cc8df4ac12af58ac48358dc90863 Mon Sep 17 00:00:00 2001 From: chay0112 Date: Mon, 24 Nov 2025 21:44:06 -0700 Subject: [PATCH 1/4] implement concave hull --- python/sedona/spark/geopandas/base.py | 69 ++++++++++++++++++- python/sedona/spark/geopandas/geoseries.py | 9 ++- python/tests/geopandas/test_geoseries.py | 29 +++++++- .../geopandas/test_match_geopandas_series.py | 15 +++- 4 files changed, 115 insertions(+), 7 deletions(-) diff --git a/python/sedona/spark/geopandas/base.py b/python/sedona/spark/geopandas/base.py index 468303e759..1b7e053691 100644 --- a/python/sedona/spark/geopandas/base.py +++ b/python/sedona/spark/geopandas/base.py @@ -608,8 +608,73 @@ def centroid(self): """ return _delegate_to_geometry_column("centroid", self) - # def concave_hull(self, ratio=0.0, allow_holes=False): - # raise NotImplementedError("This method is not implemented yet.") + def concave_hull(self, ratio=0.0, allow_holes=False): + """Return a ``GeoSeries`` of geometries representing the concave hull + of vertices of each geometry. + + The concave hull of a geometry is the smallest concave `Polygon` + containing all the points in each geometry, unless the number of points + in the geometric object is less than three. For two points, the concave + hull collapses to a `LineString`; for 1, a `Point`. + + The hull is constructed by removing border triangles of the Delaunay + Triangulation of the points as long as their "size" is larger than the + maximum edge length ratio and optionally allowing holes. The edge length factor + is a fraction of the length difference between the longest and shortest edges + in the Delaunay Triangulation of the input points. For further information + on the algorithm used, see + https://libgeos.org/doxygen/classgeos_1_1algorithm_1_1hull_1_1ConcaveHull.html + + Parameters + ---------- + ratio : float, (optional, default 0.0) + Number in the range [0, 1]. Higher numbers will include fewer vertices + in the hull. + allow_holes : bool, (optional, default False) + If set to True, the concave hull may have holes. + + Examples + -------- + >>> from sedona.spark.geopandas import GeoSeries + >>> from shapely.geometry import Polygon, LineString, Point, MultiPoint + >>> s = GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 1), (0, 1)]), + ... LineString([(0, 0), (1, 1), (1, 0)]), + ... MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0), (0.5, 0.5)]), + ... MultiPoint([(0, 0), (1, 1)]), + ... Point(0, 0), + ... ], + ... crs=3857 + ... ) + >>> s + 0 POLYGON ((0 0, 1 1, 0 1, 0 0)) + 1 LINESTRING (0 0, 1 1, 1 0) + 2 MULTIPOINT ((0 0), (1 1), (0 1), (1 0), (0.5 0... + 3 MULTIPOINT ((0 0), (1 1)) + 4 POINT (0 0) + dtype: geometry + + >>> s.concave_hull() + 0 POLYGON ((0 1, 1 1, 0 0, 0 1)) + 1 POLYGON ((0 0, 1 1, 1 0, 0 0)) + 2 POLYGON ((0.5 0.5, 0 1, 1 1, 1 0, 0 0, 0.5 0.5)) + 3 LINESTRING (0 0, 1 1) + 4 POINT (0 0) + dtype: geometry + + See Also + -------- + GeoSeries.convex_hull : convex hull geometry + + Notes + ----- + The algorithms considers only vertices of each geometry. As a result the + hull may not fully enclose input geometry. If that happens, increasing ``ratio`` + should resolve the issue. + + """ + return _delegate_to_geometry_column("concave_hull", self, ratio, allow_holes) @property def convex_hull(self): diff --git a/python/sedona/spark/geopandas/geoseries.py b/python/sedona/spark/geopandas/geoseries.py index 4fbccdcf96..34327ea590 100644 --- a/python/sedona/spark/geopandas/geoseries.py +++ b/python/sedona/spark/geopandas/geoseries.py @@ -972,9 +972,12 @@ def centroid(self) -> "GeoSeries": returns_geom=True, ) - def concave_hull(self, ratio=0.0, allow_holes=False): - # Implementation of the abstract method. - raise NotImplementedError("This method is not implemented yet.") + def concave_hull(self, ratio=0.0, allow_holes=False) -> "GeoSeries": + spark_expr = stf.ST_ConcaveHull(self.spark.column, ratio, allow_holes) + return self._query_geometry_column( + spark_expr, + returns_geom=True, + ) @property def convex_hull(self) -> "GeoSeries": diff --git a/python/tests/geopandas/test_geoseries.py b/python/tests/geopandas/test_geoseries.py index 74cbff8970..14c1afe296 100644 --- a/python/tests/geopandas/test_geoseries.py +++ b/python/tests/geopandas/test_geoseries.py @@ -1238,7 +1238,34 @@ def test_centroid(self): self.check_sgpd_equals_gpd(result, expected) def test_concave_hull(self): - pass + s = GeoSeries( + [ + Polygon([(0, 0), (1, 1), (0, 1)]), + LineString([(0, 0), (1, 1), (1, 0)]), + MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0), (0.5, 0.5)]), + MultiPoint([(0, 0), (1, 1)]), + Point(0, 0), + ], + crs=3857, + ) + + result = s.concave_hull() + + expected = gpd.GeoSeries( + [ + Polygon([(0, 1), (1, 1), (0, 0), (0, 1)]), + Polygon([(0, 0), (1, 1), (1, 0), (0, 0)]), + Polygon([(0.5, 0.5), (0, 1), (1, 1), (1, 0), (0, 0), (0.5, 0.5)]), + LineString([(0, 0), (1, 1)]), + Point(0, 0), + ], + crs=3857, + ) + self.check_sgpd_equals_gpd(result, expected) + + # Check if GeoDataFrame works as well + df_result = s.to_geoframe().concave_hull() + self.check_sgpd_equals_gpd(df_result, expected) def test_convex_hull(self): s = GeoSeries( diff --git a/python/tests/geopandas/test_match_geopandas_series.py b/python/tests/geopandas/test_match_geopandas_series.py index fc5bc27187..46f41dc16d 100644 --- a/python/tests/geopandas/test_match_geopandas_series.py +++ b/python/tests/geopandas/test_match_geopandas_series.py @@ -723,7 +723,20 @@ def test_centroid(self): self.check_sgpd_equals_gpd(sgpd_result, gpd_result) def test_concave_hull(self): - pass + for geom in self.geoms: + for ratio, allow_holes in [(0.0, False), (0.5, True), (1.0, False)]: + sgpd_result = GeoSeries(geom).concave_hull( + ratio=ratio, allow_holes=allow_holes + ) + gpd_result = gpd.GeoSeries(geom).concave_hull( + ratio=ratio, allow_holes=allow_holes + ) + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) + + mixed = [self.points[1], self.linestrings[1], self.polygons[1], None] + sgpd_result = GeoSeries(mixed).concave_hull() + gpd_result = gpd.GeoSeries(mixed).concave_hull() + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) def test_convex_hull(self): for geom in self.geoms: From d774c83a934ac84cffb213486923036b2162276b Mon Sep 17 00:00:00 2001 From: chay0112 Date: Tue, 25 Nov 2025 14:26:53 -0700 Subject: [PATCH 2/4] Modified parity tests for concave_hull --- python/tests/geopandas/test_geoseries.py | 2 +- .../geopandas/test_match_geopandas_series.py | 26 +++++++++++++------ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/python/tests/geopandas/test_geoseries.py b/python/tests/geopandas/test_geoseries.py index 14c1afe296..b08c7e735f 100644 --- a/python/tests/geopandas/test_geoseries.py +++ b/python/tests/geopandas/test_geoseries.py @@ -24,7 +24,7 @@ import sedona.spark.geopandas as sgpd from sedona.spark.geopandas import GeoSeries, GeoDataFrame from tests.geopandas.test_geopandas_base import TestGeopandasBase -from shapely import wkt +from shapely import points, wkt from shapely.geometry import ( Point, LineString, diff --git a/python/tests/geopandas/test_match_geopandas_series.py b/python/tests/geopandas/test_match_geopandas_series.py index 46f41dc16d..808c670d45 100644 --- a/python/tests/geopandas/test_match_geopandas_series.py +++ b/python/tests/geopandas/test_match_geopandas_series.py @@ -724,14 +724,24 @@ def test_centroid(self): def test_concave_hull(self): for geom in self.geoms: - for ratio, allow_holes in [(0.0, False), (0.5, True), (1.0, False)]: - sgpd_result = GeoSeries(geom).concave_hull( - ratio=ratio, allow_holes=allow_holes - ) - gpd_result = gpd.GeoSeries(geom).concave_hull( - ratio=ratio, allow_holes=allow_holes - ) - self.check_sgpd_equals_gpd(sgpd_result, gpd_result) + sgpd_result = GeoSeries(geom).concave_hull() + gpd_result = gpd.GeoSeries(geom).concave_hull() + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) + + # Test slightly complex geometry for different ratio and allow_holes settings + geom = [ + Polygon( + [(0, 0), (0, 4), (1, 4), (1, 1), (3, 1), (3, 4), (4, 4), (4, 0), (0, 0)] + ) + ] + for ratio, allow_holes in [(0.5, True), (1.0, True)]: + sgpd_result = GeoSeries(geom).concave_hull( + ratio=ratio, allow_holes=allow_holes + ) + gpd_result = gpd.GeoSeries(geom).concave_hull( + ratio=ratio, allow_holes=allow_holes + ) + self.check_sgpd_equals_gpd(sgpd_result, gpd_result) mixed = [self.points[1], self.linestrings[1], self.polygons[1], None] sgpd_result = GeoSeries(mixed).concave_hull() From 5dce424edd798d14ff3d9ff718bbe9de1c273917 Mon Sep 17 00:00:00 2001 From: chay0112 Date: Tue, 25 Nov 2025 14:33:21 -0700 Subject: [PATCH 3/4] fixed unused imports --- python/tests/geopandas/test_geoseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/geopandas/test_geoseries.py b/python/tests/geopandas/test_geoseries.py index b08c7e735f..14c1afe296 100644 --- a/python/tests/geopandas/test_geoseries.py +++ b/python/tests/geopandas/test_geoseries.py @@ -24,7 +24,7 @@ import sedona.spark.geopandas as sgpd from sedona.spark.geopandas import GeoSeries, GeoDataFrame from tests.geopandas.test_geopandas_base import TestGeopandasBase -from shapely import points, wkt +from shapely import wkt from shapely.geometry import ( Point, LineString, From 4b153a4be5ad193b5bd9a4825deae51f25e50618 Mon Sep 17 00:00:00 2001 From: chay0112 Date: Tue, 25 Nov 2025 16:18:43 -0700 Subject: [PATCH 4/4] skip test for concave_hull if gpd version is less than 0.14 --- python/tests/geopandas/test_match_geopandas_series.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/tests/geopandas/test_match_geopandas_series.py b/python/tests/geopandas/test_match_geopandas_series.py index 808c670d45..ab2feac1a9 100644 --- a/python/tests/geopandas/test_match_geopandas_series.py +++ b/python/tests/geopandas/test_match_geopandas_series.py @@ -722,6 +722,10 @@ def test_centroid(self): gpd_result = gpd.GeoSeries(geom).centroid self.check_sgpd_equals_gpd(sgpd_result, gpd_result) + @pytest.mark.skipif( + parse_version(gpd.__version__) < parse_version("0.14.0"), + reason="geopandas concave_hull requires version 0.14.0 or higher", + ) def test_concave_hull(self): for geom in self.geoms: sgpd_result = GeoSeries(geom).concave_hull()