From de9a55a14a10ceadc3756573d95dc74bb3e44f5c Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Sun, 31 Aug 2025 09:21:46 +1000 Subject: [PATCH 01/41] feat: implement ST_Expand It returns a bounding polygon expanded by a specified amount. A common use case is doing a "near" search on a point. This is a postgis function, not found by me in OGC MM. See https://postgis.net/docs/ST_Expand.html --- .../modules/main/spatial_functions_scalar.cpp | 78 +++++++++++++++++++ test/sql/geometry/st_expand.test | 46 +++++++++++ 2 files changed, 124 insertions(+) create mode 100644 test/sql/geometry/st_expand.test diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 6ca172a4..36c62c27 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -3003,6 +3003,83 @@ struct ST_Dump { } }; + +//====================================================================================================================== +// ST_Expand +//====================================================================================================================== + +struct ST_Expand { + + //------------------------------------------------------------------------------------------------------------------ + // GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { + auto &lstate = LocalState::ResetAndGet(state); + + BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &blob, double distance) { + sgl::geometry geom; + lstate.Deserialize(blob, geom); + auto bbox = sgl::extent_xy::smallest(); + + if (sgl::ops::get_total_extent_xy(geom, bbox) == 0) { + const sgl::geometry empty(sgl::geometry_type::GEOMETRY_COLLECTION, false, false); + return lstate.Serialize(result, empty); + } else { + sgl::geometry expanded(sgl::geometry_type::POLYGON, false, false); + const auto min_x = bbox.min.x - distance; + const auto min_y = bbox.min.y - distance; + const auto max_x = bbox.max.x + distance; + const auto max_y = bbox.max.y + distance; + const double buffer[10] = {min_x, min_y, min_x, max_y, max_x, max_y, max_x, min_y, min_x, min_y}; + + sgl::geometry ring(sgl::geometry_type::LINESTRING, false, false); + ring.set_vertex_array(buffer, 5); + expanded.append_part(&ring); + return lstate.Serialize(result, expanded); + } + }); + } + + //------------------------------------------------------------------------------------------------------------------ + // Documentation + //------------------------------------------------------------------------------------------------------------------ + static constexpr auto DESCRIPTION = R"( + Expand the input geometry by the specified distance, returning a polygon. + + `geom` is the input geometry. + + `distance` is the target distance for the expansion, using the same units as the input geometry. + + This is a planar operation and will not take into account the curvature of the earth. + )"; + static constexpr auto EXAMPLE = R"( + SELECT ST_AsText(ST_Expand(ST_GeomFromText('POINT(20 30)'), 0.1)); + )"; + + //------------------------------------------------------------------------------------------------------------------ + // Register + //------------------------------------------------------------------------------------------------------------------ + static void Register(ExtensionLoader &loader) { + FunctionBuilder::RegisterScalar(loader, "ST_Expand", [](ScalarFunctionBuilder &func) { + func.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("distance", LogicalType::DOUBLE); + variant.SetReturnType(GeoTypes::GEOMETRY()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(Execute); + }); + + func.SetDescription(DESCRIPTION); + func.SetExample(EXAMPLE); + + func.SetTag("ext", "spatial"); + func.SetTag("category", "property"); + }); + } +}; + + //====================================================================================================================== // ST_Extent //====================================================================================================================== @@ -9279,6 +9356,7 @@ void RegisterSpatialScalarFunctions(ExtensionLoader &loader) { ST_DistanceWithin::Register(loader); ST_Dump::Register(loader); ST_EndPoint::Register(loader); + ST_Expand::Register(loader); ST_Extent::Register(loader); ST_Extent_Approx::Register(loader); // Op_IntersectApprox::Register(loader); diff --git a/test/sql/geometry/st_expand.test b/test/sql/geometry/st_expand.test new file mode 100644 index 00000000..9f2c1f7d --- /dev/null +++ b/test/sql/geometry/st_expand.test @@ -0,0 +1,46 @@ +require spatial + +query I +SELECT ST_AsText(ST_Expand(ST_MakePoint(153.0, -38.0), 0.001)); +---- +POLYGON ((152.999 -38.001, 152.999 -37.999, 153.001 -37.999, 153.001 -38.001, 152.999 -38.001)) + +query I +SELECT ST_AsText(ST_Expand(ST_MakePoint(153.0, -38.0), 0.0)); +---- +POLYGON ((153 -38, 153 -38, 153 -38, 153 -38, 153 -38)) + +query I +SELECT ST_AsText(ST_Expand(ST_GeomFromText('POINT(20 30)'), 0.001)); +---- +POLYGON ((19.999 29.999, 19.999 30.001, 20.001 30.001, 20.001 29.999, 19.999 29.999)) + +query I +SELECT ST_AsText(ST_Expand(ST_GeomFromText('GEOMETRYCOLLECTION(POINT(20 30))'), 0.001)); +---- +POLYGON ((19.999 29.999, 19.999 30.001, 20.001 30.001, 20.001 29.999, 19.999 29.999)) + +query I +SELECT ST_AsText(ST_Expand(ST_GeomFromText('POLYGON((20 30, 21 30, 21 31, 20 31, 20 30))'), 0.1)); +---- +POLYGON ((19.9 29.9, 19.9 31.1, 21.1 31.1, 21.1 29.9, 19.9 29.9)) + +query I +SELECT ST_AsText(ST_Expand(ST_GeomFromText('POLYGON((20 30, 21 30, 22 32, 21 31, 20 31, 20 30))'), 0.1)); +---- +POLYGON ((19.9 29.9, 19.9 32.1, 22.1 32.1, 22.1 29.9, 19.9 29.9)) + +query I +SELECT ST_AsText(ST_Expand(ST_MakeEnvelope(20, 30, 21, 31), 0.1)); +---- +POLYGON ((19.9 29.9, 19.9 31.1, 21.1 31.1, 21.1 29.9, 19.9 29.9)) + +query I +SELECT ST_AsText(ST_Expand(ST_MakeEnvelope(153.2, -38.8, 153.5, -38.7), 0.0)); +---- +POLYGON ((153.2 -38.8, 153.2 -38.7, 153.5 -38.7, 153.5 -38.8, 153.2 -38.8)) + +query I +SELECT ST_AsText(ST_Expand(ST_GeomFromText('GEOMETRYCOLLECTION EMPTY'), 0.001)); +---- +GEOMETRYCOLLECTION EMPTY \ No newline at end of file From ca2d45d4ee96684d3b55c967d7a944151caa642e Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Sun, 31 Aug 2025 13:18:35 +1000 Subject: [PATCH 02/41] fix typo in variable name No functional changes. --- duckdb | 2 +- src/spatial/modules/main/spatial_functions_scalar.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/duckdb b/duckdb index 2ed9bf88..ff0f9595 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 2ed9bf887f61a0ac226ab8c8f1164601d985d607 +Subproject commit ff0f95954bdb4c7515481ac2b473261407bad18b diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 6ca172a4..aa076f8f 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -5251,7 +5251,7 @@ struct ST_LineInterpolatePoint { auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute( - args.data[0], args.data[1], result, args.size(), [&](const string_t &blob, const double faction) { + args.data[0], args.data[1], result, args.size(), [&](const string_t &blob, const double fraction) { sgl::geometry geom; lstate.Deserialize(blob, geom); @@ -5260,7 +5260,7 @@ struct ST_LineInterpolatePoint { } sgl::vertex_xyzm out_vertex = {0, 0, 0, 0}; - if (sgl::linestring::interpolate(geom, faction, out_vertex)) { + if (sgl::linestring::interpolate(geom, fraction, out_vertex)) { sgl::geometry point(sgl::geometry_type::POINT, geom.has_z(), geom.has_m()); point.set_vertex_array(&out_vertex, 1); return lstate.Serialize(result, point); From 43b38f27db7ee93be3f1d89cb0ae842c1d5e1a2b Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Thu, 25 Sep 2025 22:05:42 +0200 Subject: [PATCH 03/41] Fix typo --- src/spatial/modules/main/spatial_functions_scalar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 456f2a40..d1eef5dc 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -6092,7 +6092,7 @@ struct ST_Hilbert { static constexpr auto DESCRIPTION = R"( Encodes the X and Y values as the hilbert curve index for a curve covering the given bounding box. If a geometry is provided, the center of the approximate bounding box is used as the point to encode. - If no bounding box is provided, the hilbert curve index is mapped to the full range of a single-presicion float. + If no bounding box is provided, the hilbert curve index is mapped to the full range of a single-precision float. For the BOX_2D and BOX_2DF variants, the center of the box is used as the point to encode. )"; From e32e07e1c3f52a23453c94259cf3971ea1b4699a Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 24 Oct 2025 11:14:26 +0200 Subject: [PATCH 04/41] update CI --- .github/workflows/MainDistributionPipeline.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 8e1b18c1..bdeba8e0 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -5,13 +5,13 @@ name: Main Extension Distribution Pipeline on: pull_request: branches: - - main + - v1.4-andium paths-ignore: - '**/README.md' - 'doc/**' push: branches: - - main + - v1.4-andium paths-ignore: - '**/README.md' - 'doc/**' @@ -27,7 +27,7 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: main + duckdb_version: v1.4-andium extension_name: spatial ci_tools_version: main vcpkg_commit: ce613c41372b23b1f51333815feb3edd87ef8a8b @@ -38,7 +38,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main secrets: inherit with: - duckdb_version: main + duckdb_version: v1.4-andium ci_tools_version: main extension_name: spatial deploy_latest: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }} From 456d9f968db466690b9596dd1d58faef8153fcf1 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 24 Oct 2025 11:20:35 +0200 Subject: [PATCH 05/41] pin to duckdb v1.4.1, fix bug in st_distance when processing zero-length segments in polygon rings --- .../workflows/MainDistributionPipeline.yml | 4 +-- duckdb | 2 +- src/sgl/sgl.cpp | 33 +++++++++++++++++++ test/sql/geometry/st_distance.test | 12 +++++++ 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index bdeba8e0..91f0f865 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -27,7 +27,7 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: v1.4-andium + duckdb_version: v1.4.1 extension_name: spatial ci_tools_version: main vcpkg_commit: ce613c41372b23b1f51333815feb3edd87ef8a8b @@ -38,7 +38,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main secrets: inherit with: - duckdb_version: v1.4-andium + duckdb_version: v1.4.1 ci_tools_version: main extension_name: spatial deploy_latest: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }} diff --git a/duckdb b/duckdb index 47993080..9069f536 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 4799308087583835a7731a266262ba0fcac9af08 +Subproject commit 9069f5363cac06a1172dfeb36f5f2cd2a97bf37b diff --git a/src/sgl/sgl.cpp b/src/sgl/sgl.cpp index 8a985f80..c9612e5d 100644 --- a/src/sgl/sgl.cpp +++ b/src/sgl/sgl.cpp @@ -3130,6 +3130,7 @@ static double point_segment_dist_sq(const vertex_xy &p, const vertex_xy &a, cons return diff.norm_sq(); } +// Check if point P is on segment QR static bool point_on_segment(const vertex_xy &p, const vertex_xy &q, const vertex_xy &r) { return q.x >= std::min(p.x, r.x) && q.x <= std::max(p.x, r.x) && q.y >= std::min(p.y, r.y) && q.y <= std::max(p.y, r.y); @@ -3137,6 +3138,22 @@ static bool point_on_segment(const vertex_xy &p, const vertex_xy &q, const verte static bool segment_intersects(const vertex_xy &a1, const vertex_xy &a2, const vertex_xy &b1, const vertex_xy &b2) { // Check if two segments intersect using the orientation method + // Handle degenerate cases where a segment is actually a single point + const bool a_is_point = (a1.x == a2.x && a1.y == a2.y); + const bool b_is_point = (b1.x == b2.x && b1.y == b2.y); + + if (a_is_point && b_is_point) { + // Both are points: intersect only if identical + return (a1.x == b1.x && a1.y == b1.y); + } + if (a_is_point) { + // A is a point: check if A lies on segment B + return point_on_segment(a1, b1, b2); + } + if (b_is_point) { + // B is a point: check if B lies on segment A + return point_on_segment(b1, a1, a2); + } const auto o1 = orient2d_fast(a1, a2, b1); const auto o2 = orient2d_fast(a1, a2, b2); @@ -3250,6 +3267,15 @@ static bool try_get_prepared_distance_lines(const prepared_geometry &lhs, const for (uint32_t i = lhs_beg_idx + 1; i < lhs_end_idx; i++) { memcpy(&lhs_next, lhs_vertex_array + i * lhs_vertex_width, sizeof(vertex_xy)); + // If this is a zero-length segment, skip it + // LINESTRINGs must have at least two distinct vertices to be valid, so this is safe. Even if we skip + // this vertex now, we must eventually reach a non-zero-length segment that includes this vertex as + // its start point. It will therefore still contribute to the distance calculation once we process that + // segment. + if (lhs_prev.x == lhs_next.x && lhs_prev.y == lhs_next.y) { + continue; + } + // Quick check: If the distance between the segment and the box (all the segments) // is greater than min_dist, we can skip the exact distance check @@ -3268,6 +3294,13 @@ static bool try_get_prepared_distance_lines(const prepared_geometry &lhs, const for (uint32_t j = rhs_beg_idx + 1; j < rhs_end_idx; j++) { memcpy(&rhs_next, rhs_vertex_array + j * rhs_vertex_width, sizeof(vertex_xy)); + // If this is a zero-length segment, skip it + // LINESTRINGs must have at least two distinct points to be valid, so this is safe. + // (see comment above) + if (rhs_prev.x == rhs_next.x && rhs_prev.y == rhs_next.y) { + continue; + } + // Quick check: If the distance between the segment bounds are greater than min_dist, // we can skip the exact distance check extent_xy rhs_seg; diff --git a/test/sql/geometry/st_distance.test b/test/sql/geometry/st_distance.test index 2763e75d..4b14520f 100644 --- a/test/sql/geometry/st_distance.test +++ b/test/sql/geometry/st_distance.test @@ -26,3 +26,15 @@ SELECT ST_Distance( ---- 5 + +# This test also comes from a bug report, where ST_Distance would incorrectly handle line segments with zero length. +# The issue is that we were not properly considering segments where both endpoints are the same point, and conclude +# that a zero length segment always intersects with any other segment, which is not correct. We now skip zero-length +# segments when performing intersection tests between indexed polygon rings, and also handle zero-length segments as +# points when performing regular segment-segment intersection tests. + +query I +select st_distance('POLYGON((0 0, 1 1, 1 1, 10 10, 0 0))'::GEOMETRY, 'POLYGON((4 0, 4 1, 5 1, 5 0, 4 0))'::GEOMETRY) != 0; +---- +true + From 29020505b30661afbb3423c5de04f2d19f4240ee Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Sat, 25 Oct 2025 00:23:27 +0200 Subject: [PATCH 06/41] well. we're stuck on arrow extensions for now since gdal emits/output ogc.wkb and not geoarrow --- src/spatial/modules/gdal/CMakeLists.txt | 1 + src/spatial/modules/gdal/gdal_functions.cpp | 531 ++++++++++++++++++++ src/spatial/modules/gdal/gdal_module.cpp | 4 + src/spatial/modules/gdal/gdal_module.hpp | 3 +- 4 files changed, 538 insertions(+), 1 deletion(-) create mode 100644 src/spatial/modules/gdal/gdal_functions.cpp diff --git a/src/spatial/modules/gdal/CMakeLists.txt b/src/spatial/modules/gdal/CMakeLists.txt index bdfd0b2d..25cb0464 100644 --- a/src/spatial/modules/gdal/CMakeLists.txt +++ b/src/spatial/modules/gdal/CMakeLists.txt @@ -1,4 +1,5 @@ set(EXTENSION_SOURCES ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/gdal_module.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/gdal_functions.cpp PARENT_SCOPE) \ No newline at end of file diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp new file mode 100644 index 00000000..6174c50f --- /dev/null +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -0,0 +1,531 @@ +#include "duckdb/main/extension/extension_loader.hpp" +#include "duckdb/function/copy_function.hpp" + + +#include "cpl_string.h" +#include "cpl_vsi.h" +#include "cpl_vsi_error.h" +#include "cpl_vsi_virtual.h" + +#include "gdal.h" +#include "ogr_core.h" +#include "ogr_api.h" + +#include "duckdb/common/arrow/arrow_converter.hpp" +#include "duckdb/common/arrow/arrow.hpp" +#include "duckdb/function/table/arrow.hpp" +#include "duckdb/main/database.hpp" + +namespace duckdb { +namespace { + +//====================================================================================================================== +// GDAL READ +//====================================================================================================================== +namespace gdal_read { +//---------------------------------------------------------------------------------------------------------------------- +// BIND +//---------------------------------------------------------------------------------------------------------------------- +class BindData final : public TableFunctionData { +public: + string file_path; +}; + +auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector &col_types, vector &col_names) + -> unique_ptr { + + auto result = make_uniq(); + + result->file_path = input.inputs[0].GetValue(); + + const auto dataset = GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); + if (!dataset) { + GDALClose(dataset); + throw IOException("Could not open GDAL dataset at: %s", result->file_path); + } + + if (GDALDatasetGetLayerCount(dataset) <= 0) { + GDALClose(dataset); + throw IOException("GDAL dataset contains no layers at: %s", result->file_path); + } + + const auto layer = GDALDatasetGetLayer(dataset, 0); + if (!layer) { + GDALClose(dataset); + throw IOException("Could not get GDAL layer at: %s", result->file_path); + } + + ArrowArrayStream stream; + if (!OGR_L_GetArrowStream(layer, &stream, nullptr)) { + GDALClose(dataset); + throw IOException("Could not get GDAL Arrow stream at: %s", result->file_path); + } + + ArrowSchema schema; + if (stream.get_schema(&stream, &schema) != 0) { + stream.release(&stream); + GDALClose(dataset); + throw IOException("Could not get GDAL Arrow schema at: %s", result->file_path); + } + + // Convert Arrow schema to DuckDB types + for (int64_t i = 0; i < schema.n_children; i++) { + auto &child_schema = *schema.children[i]; + const auto type = ArrowType::GetTypeFromSchema(ctx.db->config, child_schema); + col_names.push_back(child_schema.name); + col_types.push_back(type->GetDuckType()); + } + + // Release stream, schema and dataset + schema.release(&schema); + stream.release(&stream); + GDALClose(dataset); + + + return std::move(result); +} + +class GlobalState final : public GlobalTableFunctionState { +public: + + ~GlobalState() override { + if (dataset) { + GDALClose(dataset); + dataset = nullptr; + } + + if (stream.release) { + stream.release(&stream); + } + } + + GDALDatasetH dataset; + OGRLayerH layer; + ArrowArrayStream stream; + vector> col_types; +}; + +auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique_ptr { + auto &data = input.bind_data->Cast(); + + const auto dataset = GDALOpenEx(data.file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); + if (!dataset) { + throw IOException("Could not open GDAL dataset at: foo"); + } + + auto result = make_uniq(); + result->dataset = dataset; + + // Get the first layer + result->layer = GDALDatasetGetLayer(dataset, 0); + + + string str = "MAX_FEATURES_IN_BATCH=2048"; + vector buf; + buf.insert(buf.end(), str.begin(), str.end()); + buf.push_back('\0'); + vector layer_options; + layer_options.push_back(buf.data()); + layer_options.push_back(nullptr); + + // Open the Arrow stream + if (!OGR_L_GetArrowStream(result->layer, &result->stream, layer_options.data())) { + GDALClose(dataset); + throw IOException("Could not get GDAL Arrow stream at: foo"); + } + + ArrowSchema schema; + if (result->stream.get_schema(&result->stream, &schema) != 0) { + result->stream.release(&result->stream); + GDALClose(dataset); + throw IOException("Could not get GDAL Arrow schema at: foo"); + } + + // Store the column types + for (int64_t i = 0; i < schema.n_children; i++) { + auto &child_schema = *schema.children[i]; + result->col_types.push_back(ArrowType::GetTypeFromSchema(context.db->config, child_schema)); + } + + return std::move(result); +} + +void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + auto &bdata = input.bind_data->Cast(); + auto &state = input.global_state->Cast(); + + ArrowArray arrow_array; + if (state.stream.get_next(&state.stream, &arrow_array) != 0 || arrow_array.release == nullptr) { + // Finished reading + output.SetCardinality(0); + return; + } + + // Now convert the Arrow array to DuckDB + for (idx_t i = 0; i < arrow_array.n_children; i++) { + auto &arr = *arrow_array.children[i]; + auto &vec = output.data[i]; + + auto &arrow_type = *state.col_types[i]; + auto array_state = ArrowArrayScanState(context); + // We need to make sure that our chunk will hold the ownership + array_state.owned_data = duckdb::make_shared_ptr(); + array_state.owned_data->arrow_array = arrow_array; + + // We set it to nullptr to effectively transfer the ownership + arrow_array.release = nullptr; + + switch (arrow_type.GetPhysicalType()) { + case ArrowArrayPhysicalType::DICTIONARY_ENCODED: + ArrowToDuckDBConversion::ColumnArrowToDuckDBDictionary(vec, arr, 0, array_state, + arrow_array.length, arrow_type); + break; + case ArrowArrayPhysicalType::RUN_END_ENCODED: + ArrowToDuckDBConversion::ColumnArrowToDuckDBRunEndEncoded(vec, arr, 0, array_state, + arrow_array.length, arrow_type); + break; + case ArrowArrayPhysicalType::DEFAULT: + ArrowToDuckDBConversion::SetValidityMask(vec, arr, 0, + arrow_array.length, arrow_array.offset, -1); + ArrowToDuckDBConversion::ColumnArrowToDuckDB(vec, arr, 0, array_state, + arrow_array.length, arrow_type); + break; + default: + throw NotImplementedException("ArrowArrayPhysicalType not recognized"); + } + } + + output.SetCardinality(arrow_array.length); +} + +void Register(ExtensionLoader &loader) { + TableFunction read_func("gdal_read", {LogicalType::VARCHAR}, Scan, Bind, InitGlobal); + loader.RegisterFunction(read_func); +} + +} // namespace gdal_read +//====================================================================================================================== +// GDAL COPY +//====================================================================================================================== + +namespace gdal_copy { + +//---------------------------------------------------------------------------------------------------------------------- +// Bind +//---------------------------------------------------------------------------------------------------------------------- +class BindData final : public TableFunctionData { +public: + string file_path; + string driver_name; + string layer_name; + vector driver_options; + vector layer_options; + string target_srs; + OGRwkbGeometryType geometry_type; + + // Arrow info + ClientProperties props; + ArrowSchema schema; + unordered_map> extension_type_cast; + + ~BindData() override { + if (schema.release) { + schema.release(&schema); + } + } +}; + +bool MatchOption(const char* name, const pair> &option, bool list = false) { + if (StringUtil::CIEquals(name, option.first)) { + if (option.second.empty()) { + throw BinderException("GDAL COPY option '%s' requires a value", name); + } + if (!list) { + if (option.second.size() != 1) { + throw BinderException("GDAL COPY option '%s' only accepts a single value", name); + } + if (option.second.back().type().id() != LogicalTypeId::VARCHAR) { + throw BinderException("GDAL COPY option '%s' must be a string", name); + } + } else { + for (auto &val : option.second) { + if (val.type().id() != LogicalTypeId::VARCHAR) { + throw BinderException("GDAL COPY option '%s' must be a list of strings", name); + } + } + } + return true; + } + return false; +} + +auto Bind(ClientContext &context, CopyFunctionBindInput &input, const vector &names, + const vector &sql_types) -> unique_ptr { + auto result = make_uniq(); + + // Set file path + result->file_path = input.info.file_path; + + // Parse options + for (auto &option : input.info.options) { + + if (MatchOption("DRIVER", option)) { + result->driver_name = option.second.back().GetValue(); + continue; + } + + if (MatchOption("LAYER_NAME", option)) { + result->layer_name = option.second.back().GetValue(); + continue; + } + + if (MatchOption("SRS", option) || MatchOption("CRS", option)) { + result->target_srs = option.second.back().GetValue(); + continue; + } + + if (MatchOption("GEOMETRY_TYPE", option)) { + auto type = option.second.back().GetValue(); + if (StringUtil::CIEquals(type, "POINT")) { + result->geometry_type = wkbPoint; + } else if (StringUtil::CIEquals(type, "LINESTRING")) { + result->geometry_type = wkbLineString; + } else if (StringUtil::CIEquals(type, "POLYGON")) { + result->geometry_type = wkbPolygon; + } else if (StringUtil::CIEquals(type, "MULTIPOINT")) { + result->geometry_type = wkbMultiPoint; + } else if (StringUtil::CIEquals(type, "MULTILINESTRING")) { + result->geometry_type = wkbMultiLineString; + } else if (StringUtil::CIEquals(type, "MULTIPOLYGON")) { + result->geometry_type = wkbMultiPolygon; + } else if (StringUtil::CIEquals(type, "GEOMETRYCOLLECTION")) { + result->geometry_type = wkbGeometryCollection; + } else { + throw BinderException("Unsupported GEOMETRY_TYPE: '%s'", type); + } + continue; + } + + if (MatchOption("LAYER_CREATION_OPTIONS", option, true)) { + for (auto &val : option.second) { + result->layer_options.push_back(val.GetValue()); + } + continue; + } + + if (MatchOption("DATASET_CREATION_OPTIONS", option, true)) { + for (auto &val : option.second) { + result->driver_options.push_back(val.GetValue()); + } + continue; + } + + throw BinderException("Unknown GDAL COPY option: '%s'", option.first); + } + + // Check that options are valid + if (result->driver_name.empty()) { + throw BinderException("GDAL COPY option 'DRIVER' is required"); + } + + if (result->layer_name.empty()) { + auto &fs = FileSystem::GetFileSystem(context); + result->layer_name = fs.ExtractBaseName(result->file_path); + } + + // Check the driver + const auto driver = GDALGetDriverByName(result->driver_name.c_str()); + if (!driver) { + throw BinderException("Could not find GDAL driver: " + result->driver_name); + } + + // Try to get the file extension from the driver + const auto file_ext = GDALGetMetadataItem(driver, GDAL_DMD_EXTENSIONS, nullptr); + if (file_ext) { + input.file_extension = file_ext; + } else { + const auto file_exts = GDALGetMetadataItem(driver, GDAL_DMD_EXTENSIONS, nullptr); + const auto exts = StringUtil::Split(file_exts, ' '); + if (!exts.empty()) { + input.file_extension = exts[0]; + } + } + + // Driver-specific checks + if (result->driver_name == "OpenFileGDB" && result->geometry_type == wkbUnknown) { + throw BinderException("OpenFileGDB requires 'GEOMETRY_TYPE' parameter to be set when writing!"); + } + + // Setup arrow schema + result->props = context.GetClientProperties(); + result->extension_type_cast = duckdb::ArrowTypeExtensionData::GetExtensionTypes(context, sql_types); + ArrowConverter::ToArrowSchema(&result->schema, sql_types, names, result->props); + + return std::move(result); +} + +//---------------------------------------------------------------------------------------------------------------------- +// Global State +//---------------------------------------------------------------------------------------------------------------------- +class GlobalState final : public GlobalFunctionData { +public: + + ~GlobalState() override { + if (dataset) { + GDALClose(dataset); + dataset = nullptr; + } + + if (array.release) { + array.release(&array); + array.release = nullptr; + } + } + + void Open(const BindData &data) { + + const auto driver = GDALGetDriverByName(data.driver_name.c_str()); + if (!driver) { + throw InvalidInputException("Could not find GDAL driver: " + data.driver_name); + } + + // Make CPL list for driver options + vector cpl_driver_options; + for (auto &option : data.driver_options) { + cpl_driver_options.push_back(option.c_str()); + } + cpl_driver_options.push_back(nullptr); + + // Create Dataset + dataset = GDALCreate(driver, data.file_path.c_str(), 0, 0, 0, GDT_Unknown, nullptr); + if (!dataset) { + throw IOException("Could not create GDAL dataset at: " + data.file_path); + } + + // Make CPL list for layer options + vector cpl_layer_options; + for (auto &option : data.layer_options) { + cpl_layer_options.push_back(option.c_str()); + } + cpl_layer_options.push_back(nullptr); + + // Create Layer + layer = GDALDatasetCreateLayer(dataset, data.driver_name.c_str(), nullptr, wkbUnknown, nullptr); + if (!layer) { + throw IOException("Could not create GDAL layer in dataset at: " + data.file_path); + } + + // Create fields for all children + auto geometry_field_count = 0; + for (auto i = 0; i < data.schema.n_children; i++) { + const auto child_schema = data.schema.children[i]; + + // Check if this is a geometry field + if (child_schema->metadata != nullptr) { + // TODO: Look for arrow metadata! + geometry_field_count++; + if (geometry_field_count > 1) { + throw NotImplementedException("Multiple geometry fields not supported yet"); + } + } else { + // Register normal attribute + if (!OGR_L_CreateFieldFromArrowSchema(layer, child_schema, nullptr)) { + throw IOException("Could not create field in GDAL layer for column: " + string(child_schema->name)); + } + } + } + } +public: + mutex lock; + GDALDatasetH dataset; + OGRLayerH layer; + ArrowArray array; +}; + +auto InitGlobal(ClientContext &context, FunctionData &bdata, const string &path) -> unique_ptr { + auto &bind_data = bdata.Cast(); + auto result = make_uniq(); + + result->Open(bind_data); + + return std::move(result); +} + + +//---------------------------------------------------------------------------------------------------------------------- +// Local State +//---------------------------------------------------------------------------------------------------------------------- +class LocalState final : public LocalFunctionData { +public: + // No-op, we don't need any local state for now +}; + +auto InitLocal(ExecutionContext &context, FunctionData &bind_data) -> unique_ptr { + auto result = make_uniq(); + return std::move(result); +} + +//---------------------------------------------------------------------------------------------------------------------- +// Sink +//---------------------------------------------------------------------------------------------------------------------- +void Sink(ExecutionContext &context, FunctionData &bdata_p, GlobalFunctionData &gstate_p, LocalFunctionData &lstate_p, + DataChunk &input) { + + const auto &bdata = bdata_p.Cast(); + auto &gstate = gstate_p.Cast(); + + // Lock + lock_guard guard(gstate.lock); + + auto &arrow_array = gstate.array; + auto &arrow_schema = bdata.schema; + + ArrowConverter::ToArrowArray(input, &arrow_array, bdata.props, bdata.extension_type_cast); + OGR_L_WriteArrowBatch(gstate.layer, &arrow_schema, &arrow_array, nullptr); + + if (arrow_array.release) { + arrow_array.release(&arrow_array); + arrow_array.release = nullptr; + } +} + +//---------------------------------------------------------------------------------------------------------------------- +// Combine +//---------------------------------------------------------------------------------------------------------------------- +void Combine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + LocalFunctionData &lstate) { + +} + +//---------------------------------------------------------------------------------------------------------------------- +// Finalize +//---------------------------------------------------------------------------------------------------------------------- +void Finalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { + +} + +//---------------------------------------------------------------------------------------------------------------------- +// Register +//---------------------------------------------------------------------------------------------------------------------- +void Register(ExtensionLoader &loader) { + CopyFunction info("GDAL"); + + info.copy_to_bind = Bind; + info.copy_to_initialize_local = InitLocal; + info.copy_to_initialize_global = InitGlobal; + info.copy_to_sink = Sink; + info.copy_to_combine = Combine; + info.copy_to_finalize = Finalize; + info.extension = "gdal"; + + loader.RegisterFunction(info); +} + +} // namespace gdal_copy +} // namespace + +void RegisterExtraFunction(ExtensionLoader &loader) { + gdal_copy::Register(loader); + gdal_read::Register(loader); +} +} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index f24f0498..c107d39a 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -2007,6 +2007,7 @@ struct ST_Write { // Register //------------------------------------------------------------------------------------------------------------------ static void Register(ExtensionLoader &loader) { + /* CopyFunction info("GDAL"); info.copy_to_bind = Bind; info.copy_to_initialize_local = InitLocal; @@ -2016,6 +2017,7 @@ struct ST_Write { info.copy_to_finalize = Finalize; info.extension = "gdal"; loader.RegisterFunction(info); + */ } }; @@ -2072,6 +2074,8 @@ void RegisterGDALModule(ExtensionLoader &loader) { }); }); + RegisterExtraFunction(loader); + ST_Read::Register(loader); ST_Read_Meta::Register(loader); ST_Drivers::Register(loader); diff --git a/src/spatial/modules/gdal/gdal_module.hpp b/src/spatial/modules/gdal/gdal_module.hpp index 087d54ba..f84ce954 100644 --- a/src/spatial/modules/gdal/gdal_module.hpp +++ b/src/spatial/modules/gdal/gdal_module.hpp @@ -1,9 +1,10 @@ #pragma once +#include "duckdb/main/extension/extension_loader.hpp" namespace duckdb { class ExtensionLoader; void RegisterGDALModule(ExtensionLoader &loader); - +void RegisterExtraFunction(ExtensionLoader &loader); } // namespace duckdb From 9e072dcea0b64f902e5093c52ed4d994556b1e0c Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Sat, 25 Oct 2025 00:54:02 +0200 Subject: [PATCH 07/41] nvm figured out how to force geoarrow.wkb --- src/spatial/modules/gdal/gdal_functions.cpp | 45 +++++++++++++++------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp index 6174c50f..79ca9f54 100644 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -23,12 +23,33 @@ namespace { // GDAL READ //====================================================================================================================== namespace gdal_read { + +class StringList { +public: + void Add(const string &item) { + const auto cstr = new char[item.size() + 1]; + strcpy(cstr, item.c_str()); + items.insert(items.end() - 1, cstr); + } + + char** Get() { return items.data(); } + + ~StringList() { + for (const auto &item : items) { + delete[] item; + } + } +private: + vector items = { nullptr }; +}; + //---------------------------------------------------------------------------------------------------------------------- // BIND //---------------------------------------------------------------------------------------------------------------------- class BindData final : public TableFunctionData { public: string file_path; + StringList layer_options; }; auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector &col_types, vector &col_names) @@ -38,6 +59,10 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector result->file_path = input.inputs[0].GetValue(); + // Set GDAL Arrow layer options + result->layer_options.Add(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE)); + result->layer_options.Add("GEOMETRY_METADATA_ENCODING=GEOARROW"); + const auto dataset = GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); if (!dataset) { GDALClose(dataset); @@ -56,7 +81,7 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector } ArrowArrayStream stream; - if (!OGR_L_GetArrowStream(layer, &stream, nullptr)) { + if (!OGR_L_GetArrowStream(layer, &stream, result->layer_options.Get())) { GDALClose(dataset); throw IOException("Could not get GDAL Arrow stream at: %s", result->file_path); } @@ -81,7 +106,6 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector stream.release(&stream); GDALClose(dataset); - return std::move(result); } @@ -106,9 +130,9 @@ class GlobalState final : public GlobalTableFunctionState { }; auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique_ptr { - auto &data = input.bind_data->Cast(); + auto &bdata = input.bind_data->Cast(); - const auto dataset = GDALOpenEx(data.file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); + const auto dataset = GDALOpenEx(bdata.file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); if (!dataset) { throw IOException("Could not open GDAL dataset at: foo"); } @@ -119,17 +143,12 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique // Get the first layer result->layer = GDALDatasetGetLayer(dataset, 0); - - string str = "MAX_FEATURES_IN_BATCH=2048"; - vector buf; - buf.insert(buf.end(), str.begin(), str.end()); - buf.push_back('\0'); - vector layer_options; - layer_options.push_back(buf.data()); - layer_options.push_back(nullptr); + StringList layer_options; + layer_options.Add(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE)); + layer_options.Add("GEOMETRY_METADATA_ENCODING=GEOARROW"); // Open the Arrow stream - if (!OGR_L_GetArrowStream(result->layer, &result->stream, layer_options.data())) { + if (!OGR_L_GetArrowStream(result->layer, &result->stream, layer_options.Get())) { GDALClose(dataset); throw IOException("Could not get GDAL Arrow stream at: foo"); } From 3456f189ca89a3f2d947a51d131ea3ee93c3439b Mon Sep 17 00:00:00 2001 From: Ilya Boyandin Date: Tue, 28 Oct 2025 23:09:59 +0100 Subject: [PATCH 08/41] fix: Linestring corrupt PBF issue --- src/spatial/modules/mvt/mvt_module.cpp | 18 +-- test/sql/mvt/st_asmvt_linestring.test | 164 +++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 9 deletions(-) create mode 100644 test/sql/mvt/st_asmvt_linestring.test diff --git a/src/spatial/modules/mvt/mvt_module.cpp b/src/spatial/modules/mvt/mvt_module.cpp index fc355dd1..48907eaa 100644 --- a/src/spatial/modules/mvt/mvt_module.cpp +++ b/src/spatial/modules/mvt/mvt_module.cpp @@ -609,15 +609,15 @@ class MVTFeatureBuilder { const auto y = CastDouble(cursor.Read()); cursor.Skip(vertex_space); // Skip z and m if present - if (vertex_idx == 0) { - geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part - geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); - geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); - geometry.push_back((2 & 0x7) | ((vertex_count - 2) << 3)); // LineTo, part count - } else { - geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); - geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); - } + if (vertex_idx == 0) { + geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + geometry.push_back((2 & 0x7) | ((vertex_count - 1) << 3)); // LineTo, part count + } else { + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + } cursor_x = x; cursor_y = y; diff --git a/test/sql/mvt/st_asmvt_linestring.test b/test/sql/mvt/st_asmvt_linestring.test new file mode 100644 index 00000000..5f1dd9ae --- /dev/null +++ b/test/sql/mvt/st_asmvt_linestring.test @@ -0,0 +1,164 @@ +# name: test/sql/mvt/st_asmvt_linestring.test +# group: [mvt] + +require spatial + +# Test LINESTRING encoding +statement ok +COPY ( + SELECT st_asmvt( + {"geom": geom}, + 'lines' + ) as mvt + FROM ( + SELECT + st_geomfromtext('LINESTRING(0 0, 100 100, 200 0)') as geom + ) +) TO '__TEST_DIR__/test_linestring.mvt' (FORMAT BLOB); + +query I +select count(*) from st_read('__TEST_DIR__/test_linestring.mvt'); +---- +1 + +# Test MULTI_LINESTRING encoding +statement ok +COPY ( + SELECT st_asmvt( + {"geom": geom}, + 'multilines' + ) as mvt + FROM ( + SELECT + st_geomfromtext('MULTILINESTRING((0 0, 100 100, 200 0), (300 0, 400 100, 500 0))') as geom + ) +) TO '__TEST_DIR__/test_multilinestring.mvt' (FORMAT BLOB); + +query I +select count(*) from st_read('__TEST_DIR__/test_multilinestring.mvt'); +---- +1 + +# Test LINESTRING with ST_AsMVTGeom (clipping can produce MULTI_LINESTRING) +statement ok +COPY ( + SELECT st_asmvt( + {"geom": ST_AsMVTGeom( + geom, + ST_MakeEnvelope(0, 0, 1000, 1000), + 4096, + 256, + true + )}, + 'clipped_lines' + ) as mvt + FROM ( + SELECT + st_geomfromtext('LINESTRING(100 100, 500 500, 900 100)') as geom + ) +) TO '__TEST_DIR__/test_clipped_linestring.mvt' (FORMAT BLOB); + +query I +select count(*) from st_read('__TEST_DIR__/test_clipped_linestring.mvt'); +---- +1 + +# Test LINESTRING crossing tile boundary (produces MULTI_LINESTRING after clipping) +statement ok +COPY ( + SELECT st_asmvt( + {"geom": ST_AsMVTGeom( + geom, + ST_MakeEnvelope(0, 0, 1000, 1000), + 4096, + 256, + true + )}, + 'crossing_lines' + ) as mvt + FROM ( + SELECT + st_geomfromtext('LINESTRING(-500 500, 500 500, 1500 500)') as geom + ) +) TO '__TEST_DIR__/test_crossing_linestring.mvt' (FORMAT BLOB); + +query I +select count(*) from st_read('__TEST_DIR__/test_crossing_linestring.mvt'); +---- +1 + +# Test multiple LINESTRINGs with various lengths +statement ok +COPY ( + SELECT st_asmvt( + {"geom": geom, "id": id}, + 'various_lines', + 4096, + 'geom', + 'id' + ) as mvt + FROM ( + SELECT + row_number() over () as id, + st_geomfromtext('LINESTRING(' || (x*100) || ' ' || (y*100) || ', ' || (x*100+50) || ' ' || (y*100+50) || ', ' || (x*100+100) || ' ' || (y*100) || ')') as geom + FROM range(0, 10) as r(x), + range(0, 10) as rr(y) + ) +) TO '__TEST_DIR__/test_various_linestrings.mvt' (FORMAT BLOB); + +query I +select count(*) from st_read('__TEST_DIR__/test_various_linestrings.mvt'); +---- +100 + +# Test global scale dataset scenario (like Natural Earth roads) +# This simulates the case where geometries at low zoom levels span large areas +statement ok +COPY ( + SELECT st_asmvt( + {"geom": ST_AsMVTGeom( + geom, + ST_TileEnvelope(2, 1, 1), + 4096, + 256, + false + )}, + 'global_lines' + ) as mvt + FROM ( + SELECT + st_geomfromtext('LINESTRING(-10000000 5000000, 0 0, 10000000 -5000000)') as geom + ) +) TO '__TEST_DIR__/test_global_linestring.mvt' (FORMAT BLOB); + +query I +select count(*) from st_read('__TEST_DIR__/test_global_linestring.mvt'); +---- +1 + +# Test that clipped MULTI_LINESTRING can be read back +statement ok +COPY ( + SELECT st_asmvt( + {"geom": ST_AsMVTGeom( + geom, + ST_TileEnvelope(5, 10, 12), + 4096, + 256, + true + ), "name": name}, + 'roads' + ) as mvt + FROM ( + VALUES + (st_geomfromtext('MULTILINESTRING((100 100, 500 500), (600 600, 900 900))'), 'road1'), + (st_geomfromtext('LINESTRING(200 200, 800 800)'), 'road2') + ) t(geom, name) + WHERE ST_Intersects(geom, ST_TileEnvelope(5, 10, 12)) +) TO '__TEST_DIR__/test_roads.mvt' (FORMAT BLOB); + +query II +select count(*), count(name) from st_read('__TEST_DIR__/test_roads.mvt'); +---- +2 2 + From b56d9ce0dea54f4af7f937c883bb1bc3afc77ce6 Mon Sep 17 00:00:00 2001 From: Ilya Boyandin Date: Wed, 29 Oct 2025 19:16:47 +0100 Subject: [PATCH 09/41] fix tests --- test/sql/mvt/st_asmvt_linestring.test | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/test/sql/mvt/st_asmvt_linestring.test b/test/sql/mvt/st_asmvt_linestring.test index 5f1dd9ae..bb9676c0 100644 --- a/test/sql/mvt/st_asmvt_linestring.test +++ b/test/sql/mvt/st_asmvt_linestring.test @@ -45,7 +45,7 @@ COPY ( SELECT st_asmvt( {"geom": ST_AsMVTGeom( geom, - ST_MakeEnvelope(0, 0, 1000, 1000), + ST_Extent(ST_MakeEnvelope(0, 0, 1000, 1000)), 4096, 256, true @@ -69,7 +69,7 @@ COPY ( SELECT st_asmvt( {"geom": ST_AsMVTGeom( geom, - ST_MakeEnvelope(0, 0, 1000, 1000), + ST_Extent(ST_MakeEnvelope(0, 0, 1000, 1000)), 4096, 256, true @@ -118,7 +118,7 @@ COPY ( SELECT st_asmvt( {"geom": ST_AsMVTGeom( geom, - ST_TileEnvelope(2, 1, 1), + ST_Extent(ST_TileEnvelope(2, 1, 1)), 4096, 256, false @@ -136,17 +136,11 @@ select count(*) from st_read('__TEST_DIR__/test_global_linestring.mvt'); ---- 1 -# Test that clipped MULTI_LINESTRING can be read back +# Test that LINESTRING with attributes can be read back statement ok COPY ( SELECT st_asmvt( - {"geom": ST_AsMVTGeom( - geom, - ST_TileEnvelope(5, 10, 12), - 4096, - 256, - true - ), "name": name}, + {"geom": geom, "name": name}, 'roads' ) as mvt FROM ( @@ -154,7 +148,6 @@ COPY ( (st_geomfromtext('MULTILINESTRING((100 100, 500 500), (600 600, 900 900))'), 'road1'), (st_geomfromtext('LINESTRING(200 200, 800 800)'), 'road2') ) t(geom, name) - WHERE ST_Intersects(geom, ST_TileEnvelope(5, 10, 12)) ) TO '__TEST_DIR__/test_roads.mvt' (FORMAT BLOB); query II From a78bfe420eb5c480eaab4cad3b11276cc5f39fb1 Mon Sep 17 00:00:00 2001 From: Jesper Paulsen Date: Thu, 23 Oct 2025 09:30:06 +0200 Subject: [PATCH 10/41] fix invalid geometries in ST_AsMVTGeom Changes GEOS_PREC_NO_TOPO to GEOS_PREC_VALID_OUTPUT in get_gridded() to ensure topological validity during grid snapping operations. --- src/spatial/modules/geos/geos_geometry.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatial/modules/geos/geos_geometry.hpp b/src/spatial/modules/geos/geos_geometry.hpp index 6ebf063c..a275e915 100644 --- a/src/spatial/modules/geos/geos_geometry.hpp +++ b/src/spatial/modules/geos/geos_geometry.hpp @@ -369,7 +369,7 @@ inline GeosGeometry GeosGeometry::get_transformed(const double matrix[6]) const } inline GeosGeometry GeosGeometry::get_gridded(double grid_size) const { - return GeosGeometry(handle, GEOSGeom_setPrecision_r(handle, geom, grid_size, GEOS_PREC_NO_TOPO)); + return GeosGeometry(handle, GEOSGeom_setPrecision_r(handle, geom, grid_size, GEOS_PREC_VALID_OUTPUT)); } inline GeosGeometry GeosGeometry::get_maximum_inscribed_circle() const { From 9d9825807471444e165d8087fffc78f34807cf06 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 31 Oct 2025 10:53:47 +0100 Subject: [PATCH 11/41] format, always orient MVT geom after processing --- src/spatial/modules/geos/geos_module.cpp | 14 +++++++++----- src/spatial/modules/mvt/mvt_module.cpp | 18 +++++++++--------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index 09bd42f7..c5424af5 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -327,9 +327,6 @@ struct ST_AsMVTGeom { const auto &blob = geom_data[geom_idx]; auto geom = lstate.Deserialize(blob); - // Orient polygons in place - geom.orient_polygons(true); - // Compute bounds const auto extent = bind_data.extent; @@ -363,10 +360,14 @@ struct ST_AsMVTGeom { const auto transformed = geom.get_transformed(affine_matrix); // Snap to grid (round coordinates to integers) - const auto snapped = transformed.get_gridded(1.0); + auto snapped = transformed.get_gridded(1.0); // Should we clip? if not, return the snapped geometry if (!bind_data.clip) { + + // But first orient in place + snapped.orient_polygons(true); + res_data[out_idx] = lstate.Serialize(result, snapped); continue; } @@ -385,7 +386,10 @@ struct ST_AsMVTGeom { } // Snap again to clean up any potential issues from clipping - const auto cleaned_clipped = clipped.get_gridded(1.0); + auto cleaned_clipped = clipped.get_gridded(1.0); + + // Also orient the polygons in place + cleaned_clipped.orient_polygons(true); res_data[out_idx] = lstate.Serialize(result, cleaned_clipped); } diff --git a/src/spatial/modules/mvt/mvt_module.cpp b/src/spatial/modules/mvt/mvt_module.cpp index 48907eaa..ef636733 100644 --- a/src/spatial/modules/mvt/mvt_module.cpp +++ b/src/spatial/modules/mvt/mvt_module.cpp @@ -609,15 +609,15 @@ class MVTFeatureBuilder { const auto y = CastDouble(cursor.Read()); cursor.Skip(vertex_space); // Skip z and m if present - if (vertex_idx == 0) { - geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part - geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); - geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); - geometry.push_back((2 & 0x7) | ((vertex_count - 1) << 3)); // LineTo, part count - } else { - geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); - geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); - } + if (vertex_idx == 0) { + geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + geometry.push_back((2 & 0x7) | ((vertex_count - 1) << 3)); // LineTo, part count + } else { + geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); + geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); + } cursor_x = x; cursor_y = y; From c10ae36eab4dfb68e7113f95a12e911b1c547d31 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 31 Oct 2025 10:55:14 +0100 Subject: [PATCH 12/41] fix typo --- src/spatial/modules/geos/geos_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index c5424af5..d5b2556f 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -1290,7 +1290,7 @@ struct ST_DistanceWithin { }); func.SetDescription(R"( - Returns if two geometries are within a target distance of each-other + Returns true if two geometries are within a target distance of each-other )"); func.SetTag("ext", "spatial"); From af00774620046fb2332bc11a41837a998d5f1879 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 31 Oct 2025 11:09:34 +0100 Subject: [PATCH 13/41] fix 2D_FromWKB functions --- .../modules/main/spatial_functions_scalar.cpp | 41 +++++++++++++++---- test/sql/geometry/st_2d_fromwkb.test | 19 +++++++++ 2 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 test/sql/geometry/st_2d_fromwkb.test diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 5387b778..a5aa86a8 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -4804,7 +4804,7 @@ struct ST_GeomFromWKB { y_data[i] = vertex.y; } - if (args.AllConstant()) { + if (args.AllConstant() || args.size() == 1) { result.SetVectorType(VectorType::CONSTANT_VECTOR); } } @@ -4872,7 +4872,7 @@ struct ST_GeomFromWKB { ListVector::SetListSize(result, total_size); - if (args.AllConstant()) { + if (args.AllConstant() || args.size() == 1) { result.SetVectorType(VectorType::CONSTANT_VECTOR); } } @@ -4967,7 +4967,7 @@ struct ST_GeomFromWKB { ListVector::SetListSize(result, total_ring_count); ListVector::SetListSize(ring_vec, total_point_count); - if (count == 1) { + if (args.AllConstant() || args.size() == 1) { result.SetVectorType(VectorType::CONSTANT_VECTOR); } } @@ -4986,8 +4986,16 @@ struct ST_GeomFromWKB { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Point2DFromWKB", [](ScalarFunctionBuilder &builder) { builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("point", GeoTypes::POINT_2D()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(GeoTypes::POINT_2D()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecutePoint); + }); + + builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("blob", LogicalType::BLOB); + variant.SetReturnType(GeoTypes::POINT_2D()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecutePoint); @@ -5001,8 +5009,16 @@ struct ST_GeomFromWKB { FunctionBuilder::RegisterScalar(loader, "ST_LineString2DFromWKB", [](ScalarFunctionBuilder &builder) { builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("linestring", GeoTypes::LINESTRING_2D()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(GeoTypes::LINESTRING_2D()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecuteLineString); + }); + + builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("blob", LogicalType::BLOB); + variant.SetReturnType(GeoTypes::LINESTRING_2D()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteLineString); @@ -5016,8 +5032,15 @@ struct ST_GeomFromWKB { FunctionBuilder::RegisterScalar(loader, "ST_Polygon2DFromWKB", [](ScalarFunctionBuilder &builder) { builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("polygon", GeoTypes::POLYGON_2D()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); + variant.SetReturnType(GeoTypes::POLYGON_2D()); + + variant.SetInit(LocalState::Init); + variant.SetFunction(ExecutePolygon); + }); + builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { + variant.AddParameter("blob", LogicalType::BLOB); + variant.SetReturnType(GeoTypes::POLYGON_2D()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecutePolygon); diff --git a/test/sql/geometry/st_2d_fromwkb.test b/test/sql/geometry/st_2d_fromwkb.test new file mode 100644 index 00000000..539734f5 --- /dev/null +++ b/test/sql/geometry/st_2d_fromwkb.test @@ -0,0 +1,19 @@ +# name: test/sql/geometry/st_2d_fromwkb.test +# group: [geometry] + +require spatial + +query I +select ST_Point2DFromWKB(ST_AsWKB(ST_Point(1, 2))); +---- +POINT (1 2) + +query I +SELECT ST_Linestring2DFromWKB(ST_AsWKB(ST_GeomFromText('LINESTRING(0 0, 1 1, 2 2)'))); +---- +LINESTRING (0 0, 1 1, 2 2) + +query I +SELECT ST_Polygon2DFromWKB(ST_AsWKB(ST_GeomFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'))); +---- +POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0)) \ No newline at end of file From 25838f33da78b053daa38e71a7d437611ac1fddd Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 31 Oct 2025 13:26:00 +0100 Subject: [PATCH 14/41] fix reused rtree nodes not being zeroed --- src/spatial/index/rtree/rtree.cpp | 7 ++- .../modules/main/spatial_functions_scalar.cpp | 45 +++++++++---------- test/sql/geometry/st_expand.test | 3 ++ 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/spatial/index/rtree/rtree.cpp b/src/spatial/index/rtree/rtree.cpp index 0b1e9b69..324d22ec 100644 --- a/src/spatial/index/rtree/rtree.cpp +++ b/src/spatial/index/rtree/rtree.cpp @@ -31,6 +31,11 @@ RTreePointer RTree::MakePage(RTreeNodeType type) const { auto &alloc = type == RTreeNodeType::LEAF_PAGE ? *leaf_allocator : *node_allocator; pointer = alloc.New(); pointer.SetMetadata(static_cast(type)); + + // zero-initialize the node count + auto &node = RefMutable(pointer); + node.Clear(); + return pointer; } @@ -655,4 +660,4 @@ void RTree::Print() const { Printer::Print(ToString()); } -} // namespace duckdb \ No newline at end of file +} // namespace duckdb diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 6c108267..96d04f18 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -3003,7 +3003,6 @@ struct ST_Dump { } }; - //====================================================================================================================== // ST_Expand //====================================================================================================================== @@ -3016,28 +3015,29 @@ struct ST_Expand { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { auto &lstate = LocalState::ResetAndGet(state); - BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &blob, double distance) { - sgl::geometry geom; - lstate.Deserialize(blob, geom); - auto bbox = sgl::extent_xy::smallest(); - - if (sgl::ops::get_total_extent_xy(geom, bbox) == 0) { - const sgl::geometry empty(sgl::geometry_type::GEOMETRY_COLLECTION, false, false); - return lstate.Serialize(result, empty); - } else { - sgl::geometry expanded(sgl::geometry_type::POLYGON, false, false); - const auto min_x = bbox.min.x - distance; - const auto min_y = bbox.min.y - distance; - const auto max_x = bbox.max.x + distance; - const auto max_y = bbox.max.y + distance; - const double buffer[10] = {min_x, min_y, min_x, max_y, max_x, max_y, max_x, min_y, min_x, min_y}; + BinaryExecutor::Execute( + args.data[0], args.data[1], result, args.size(), [&](const string_t &blob, double distance) { + sgl::geometry geom; + lstate.Deserialize(blob, geom); + auto bbox = sgl::extent_xy::smallest(); - sgl::geometry ring(sgl::geometry_type::LINESTRING, false, false); - ring.set_vertex_array(buffer, 5); - expanded.append_part(&ring); - return lstate.Serialize(result, expanded); - } - }); + if (sgl::ops::get_total_extent_xy(geom, bbox) == 0) { + const sgl::geometry empty(sgl::geometry_type::GEOMETRY_COLLECTION, false, false); + return lstate.Serialize(result, empty); + } else { + sgl::geometry expanded(sgl::geometry_type::POLYGON, false, false); + const auto min_x = bbox.min.x - distance; + const auto min_y = bbox.min.y - distance; + const auto max_x = bbox.max.x + distance; + const auto max_y = bbox.max.y + distance; + const double buffer[10] = {min_x, min_y, min_x, max_y, max_x, max_y, max_x, min_y, min_x, min_y}; + + sgl::geometry ring(sgl::geometry_type::LINESTRING, false, false); + ring.set_vertex_array(buffer, 5); + expanded.append_part(&ring); + return lstate.Serialize(result, expanded); + } + }); } //------------------------------------------------------------------------------------------------------------------ @@ -3079,7 +3079,6 @@ struct ST_Expand { } }; - //====================================================================================================================== // ST_Extent //====================================================================================================================== diff --git a/test/sql/geometry/st_expand.test b/test/sql/geometry/st_expand.test index 9f2c1f7d..209d58bc 100644 --- a/test/sql/geometry/st_expand.test +++ b/test/sql/geometry/st_expand.test @@ -1,3 +1,6 @@ +# name: test/sql/geometry/st_expand.test +# group: [geometry] + require spatial query I From 7d2a97d672363ca7fda4c6c19f7e75eb7f076aca Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 11 Nov 2025 15:11:51 +0100 Subject: [PATCH 15/41] update to duckdb v1.5, apply patches --- duckdb | 2 +- src/spatial/index/rtree/rtree_index.cpp | 8 ++++++-- src/spatial/index/rtree/rtree_index.hpp | 9 +++++++-- src/spatial/modules/main/spatial_functions_scalar.cpp | 5 +++++ src/spatial/spatial_types.hpp | 3 ++- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/duckdb b/duckdb index 9069f536..39f5583f 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 9069f5363cac06a1172dfeb36f5f2cd2a97bf37b +Subproject commit 39f5583f99dd7a921a1d9824ba4434061b8d91d7 diff --git a/src/spatial/index/rtree/rtree_index.cpp b/src/spatial/index/rtree/rtree_index.cpp index a9257826..b1d8f9da 100644 --- a/src/spatial/index/rtree/rtree_index.cpp +++ b/src/spatial/index/rtree/rtree_index.cpp @@ -284,8 +284,12 @@ bool RTreeIndex::MergeIndexes(IndexLock &state, BoundIndex &other_index) { void RTreeIndex::Vacuum(IndexLock &state) { } -string RTreeIndex::VerifyAndToString(IndexLock &state, const bool only_verify) { - throw NotImplementedException("RTreeIndex::VerifyAndToString() not implemented"); +void RTreeIndex::Verify(IndexLock &l) { + throw NotImplementedException("RTreeIndex::Verify() not implemented"); +} + +string RTreeIndex::ToString(IndexLock &l, bool display_ascii) { + throw NotImplementedException("RTreeIndex::ToString() not implemented"); } void RTreeIndex::VerifyAllocations(IndexLock &state) { diff --git a/src/spatial/index/rtree/rtree_index.hpp b/src/spatial/index/rtree/rtree_index.hpp index 202a6c72..243a4769 100644 --- a/src/spatial/index/rtree/rtree_index.hpp +++ b/src/spatial/index/rtree/rtree_index.hpp @@ -61,8 +61,13 @@ class RTreeIndex final : public BoundIndex { //! Traverses an RTreeIndex and vacuums the qualifying nodes. The lock obtained from InitializeLock must be held void Vacuum(IndexLock &state) override; - //! Returns the string representation of the RTreeIndex, or only traverses and verifies the index - string VerifyAndToString(IndexLock &state, const bool only_verify) override; + //! Traverses and verifies the index. + //! Currently not implemented. + void Verify(IndexLock &l) override; + + //! Returns the string representation of an index. + //! Currently not implemented. + string ToString(IndexLock &l, bool display_ascii = false) override; //! Ensures that the node allocation counts match the node counts. void VerifyAllocations(IndexLock &state) override; diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 96d04f18..468dcaf1 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -9342,6 +9342,11 @@ struct ST_MMin : VertexAggFunctionBase { static constexpr auto ORDINATE = VertexOrdinate::M; }; +constexpr const char * ST_M::NAME; +constexpr const char * ST_X::NAME; +constexpr const char * ST_Y::NAME; +constexpr const char * ST_Z::NAME; + } // namespace // Helper to access the constant distance from the bind data diff --git a/src/spatial/spatial_types.hpp b/src/spatial/spatial_types.hpp index 4ff859c8..a149e230 100644 --- a/src/spatial/spatial_types.hpp +++ b/src/spatial/spatial_types.hpp @@ -18,9 +18,10 @@ struct GeoTypes { static LogicalType POLYGON_3D(); static LogicalType BOX_2D(); static LogicalType BOX_2DF(); - static LogicalType GEOMETRY(); static LogicalType WKB_BLOB(); + static LogicalType GEOMETRY(); + static void Register(ExtensionLoader &loader); static LogicalType CreateEnumType(const string &name, const vector &members); From 273b1a22aa3300a30802884f3e6e219ca7865167 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 11 Nov 2025 15:25:41 +0100 Subject: [PATCH 16/41] use duckdb native geometry type instead --- src/spatial/CMakeLists.txt | 1 - src/spatial/geometry/geometry_processor.hpp | 26 +- .../geometry/geometry_serialization.cpp | 4 +- src/spatial/geometry/geometry_type.hpp | 46 ++-- src/spatial/geometry/vertex.hpp | 54 ---- .../rtree/rtree_index_create_logical.cpp | 10 +- .../rtree/rtree_index_create_physical.cpp | 2 - .../index/rtree/rtree_index_plan_scan.cpp | 4 +- src/spatial/modules/gdal/gdal_module.cpp | 8 +- src/spatial/modules/geos/geos_module.cpp | 232 +++++++++--------- src/spatial/modules/geos/geos_serde.cpp | 6 +- .../main/spatial_functions_aggregate.cpp | 2 +- .../modules/main/spatial_functions_cast.cpp | 42 ++-- .../modules/main/spatial_functions_scalar.cpp | 216 ++++++++-------- src/spatial/modules/mvt/mvt_module.cpp | 6 +- src/spatial/modules/proj/proj_module.cpp | 14 +- .../modules/shapefile/shapefile_module.cpp | 4 +- .../operators/spatial_join_optimizer.cpp | 4 +- .../operators/spatial_join_physical.cpp | 8 +- src/spatial/spatial_extension.cpp | 2 - src/spatial/spatial_geoarrow.cpp | 10 +- src/spatial/spatial_types.cpp | 4 +- src/spatial/spatial_types.hpp | 3 +- 23 files changed, 325 insertions(+), 383 deletions(-) diff --git a/src/spatial/CMakeLists.txt b/src/spatial/CMakeLists.txt index f765cdab..1c72211c 100644 --- a/src/spatial/CMakeLists.txt +++ b/src/spatial/CMakeLists.txt @@ -8,5 +8,4 @@ set(EXTENSION_SOURCES ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/spatial_extension.cpp ${CMAKE_CURRENT_SOURCE_DIR}/spatial_types.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/spatial_geoarrow.cpp PARENT_SCOPE) \ No newline at end of file diff --git a/src/spatial/geometry/geometry_processor.hpp b/src/spatial/geometry/geometry_processor.hpp index a807a019..394e03e8 100644 --- a/src/spatial/geometry/geometry_processor.hpp +++ b/src/spatial/geometry/geometry_processor.hpp @@ -105,8 +105,8 @@ class GeometryProcessor { bool has_z = false; bool has_m = false; uint32_t nesting_level = 0; - GeometryType current_type = GeometryType::POINT; - GeometryType parent_type = GeometryType::POINT; + LegacyGeometryType current_type = LegacyGeometryType::POINT; + LegacyGeometryType parent_type = LegacyGeometryType::POINT; protected: bool HasZ() const { @@ -121,10 +121,10 @@ class GeometryProcessor { uint32_t NestingLevel() const { return nesting_level; } - GeometryType CurrentType() const { + LegacyGeometryType CurrentType() const { return current_type; } - GeometryType ParentType() const { + LegacyGeometryType ParentType() const { return parent_type; } @@ -225,11 +225,11 @@ class GeometryProcessor { has_m = props.HasM(); nesting_level = 0; current_type = geom.GetType(); - parent_type = GeometryType::POINT; + parent_type = LegacyGeometryType::POINT; Cursor cursor(geom); - cursor.Skip(); + cursor.Skip(); cursor.Skip(); cursor.Skip(); cursor.Skip(); @@ -247,25 +247,25 @@ class GeometryProcessor { auto type = cursor.Peek(); switch (type) { case SerializedGeometryType::POINT: - current_type = GeometryType::POINT; + current_type = LegacyGeometryType::POINT; return ReadPoint(cursor, args...); case SerializedGeometryType::LINESTRING: - current_type = GeometryType::LINESTRING; + current_type = LegacyGeometryType::LINESTRING; return ReadLineString(cursor, args...); case SerializedGeometryType::POLYGON: - current_type = GeometryType::POLYGON; + current_type = LegacyGeometryType::POLYGON; return ReadPolygon(cursor, args...); case SerializedGeometryType::MULTIPOINT: - current_type = GeometryType::MULTIPOINT; + current_type = LegacyGeometryType::MULTIPOINT; return ReadCollection(cursor, args...); case SerializedGeometryType::MULTILINESTRING: - current_type = GeometryType::MULTILINESTRING; + current_type = LegacyGeometryType::MULTILINESTRING; return ReadCollection(cursor, args...); case SerializedGeometryType::MULTIPOLYGON: - current_type = GeometryType::MULTIPOLYGON; + current_type = LegacyGeometryType::MULTIPOLYGON; return ReadCollection(cursor, args...); case SerializedGeometryType::GEOMETRYCOLLECTION: - current_type = GeometryType::GEOMETRYCOLLECTION; + current_type = LegacyGeometryType::GEOMETRYCOLLECTION; return ReadCollection(cursor, args...); default: throw SerializationException("Unknown geometry type (%ud)", static_cast(type)); diff --git a/src/spatial/geometry/geometry_serialization.cpp b/src/spatial/geometry/geometry_serialization.cpp index 87cc403c..c9b03936 100644 --- a/src/spatial/geometry/geometry_serialization.cpp +++ b/src/spatial/geometry/geometry_serialization.cpp @@ -141,7 +141,7 @@ static void SerializeRecursive(BinaryWriter &cursor, const sgl::geometry *geom, throw InvalidInputException("Cannot serialize geometry of type %d", static_cast(type)); } - // The GeometryType enum used to start with POINT = 0 + // The LegacyGeometryType enum used to start with POINT = 0 // but now it starts with INVALID = 0, so we need to subtract 1 cursor.Write(static_cast(type) - 1); @@ -213,7 +213,7 @@ void Serde::Serialize(const sgl::geometry &geom, char *buffer, size_t buffer_siz throw InvalidInputException("Cannot serialize geometry of type INVALID"); } - // The GeometryType enum used to start with POINT = 0 + // The LegacyGeometryType enum used to start with POINT = 0 // but now it starts with INVALID = 0, so we need to subtract 1 cursor.Write(static_cast(type) - 1); cursor.Write(flags); diff --git a/src/spatial/geometry/geometry_type.hpp b/src/spatial/geometry/geometry_type.hpp index 74c34ca4..8ce41a99 100644 --- a/src/spatial/geometry/geometry_type.hpp +++ b/src/spatial/geometry/geometry_type.hpp @@ -11,7 +11,7 @@ namespace duckdb { -enum class GeometryType : uint8_t { +enum class LegacyGeometryType : uint8_t { POINT = 0, LINESTRING, POLYGON, @@ -21,37 +21,37 @@ enum class GeometryType : uint8_t { GEOMETRYCOLLECTION }; -struct GeometryTypes { - static bool IsSinglePart(GeometryType type) { - return type == GeometryType::POINT || type == GeometryType::LINESTRING; +struct LegacyGeometryTypes { + static bool IsSinglePart(LegacyGeometryType type) { + return type == LegacyGeometryType::POINT || type == LegacyGeometryType::LINESTRING; } - static bool IsMultiPart(GeometryType type) { - return type == GeometryType::POLYGON || type == GeometryType::MULTIPOINT || - type == GeometryType::MULTILINESTRING || type == GeometryType::MULTIPOLYGON || - type == GeometryType::GEOMETRYCOLLECTION; + static bool IsMultiPart(LegacyGeometryType type) { + return type == LegacyGeometryType::POLYGON || type == LegacyGeometryType::MULTIPOINT || + type == LegacyGeometryType::MULTILINESTRING || type == LegacyGeometryType::MULTIPOLYGON || + type == LegacyGeometryType::GEOMETRYCOLLECTION; } - static bool IsCollection(GeometryType type) { - return type == GeometryType::MULTIPOINT || type == GeometryType::MULTILINESTRING || - type == GeometryType::MULTIPOLYGON || type == GeometryType::GEOMETRYCOLLECTION; + static bool IsCollection(LegacyGeometryType type) { + return type == LegacyGeometryType::MULTIPOINT || type == LegacyGeometryType::MULTILINESTRING || + type == LegacyGeometryType::MULTIPOLYGON || type == LegacyGeometryType::GEOMETRYCOLLECTION; } - static string ToString(GeometryType type) { + static string ToString(LegacyGeometryType type) { switch (type) { - case GeometryType::POINT: + case LegacyGeometryType::POINT: return "POINT"; - case GeometryType::LINESTRING: + case LegacyGeometryType::LINESTRING: return "LINESTRING"; - case GeometryType::POLYGON: + case LegacyGeometryType::POLYGON: return "POLYGON"; - case GeometryType::MULTIPOINT: + case LegacyGeometryType::MULTIPOINT: return "MULTIPOINT"; - case GeometryType::MULTILINESTRING: + case LegacyGeometryType::MULTILINESTRING: return "MULTILINESTRING"; - case GeometryType::MULTIPOLYGON: + case LegacyGeometryType::MULTIPOLYGON: return "MULTIPOLYGON"; - case GeometryType::GEOMETRYCOLLECTION: + case LegacyGeometryType::GEOMETRYCOLLECTION: return "GEOMETRYCOLLECTION"; default: return StringUtil::Format("UNKNOWN(%d)", static_cast(type)); @@ -85,9 +85,9 @@ class geometry_t { return data; } - GeometryType GetType() const { + LegacyGeometryType GetType() const { // return the type - const auto type = Load(const_data_ptr_cast(data.GetPrefix())); + const auto type = Load(const_data_ptr_cast(data.GetPrefix())); const auto props = Load(const_data_ptr_cast(data.GetPrefix() + 1)); props.CheckVersion(); return type; @@ -104,7 +104,7 @@ class geometry_t { Cursor cursor(data); // Read the header - auto header_type = cursor.Read(); + auto header_type = cursor.Read(); auto properties = cursor.Read(); auto hash = cursor.Read(); (void)hash; @@ -123,7 +123,7 @@ class geometry_t { return true; } - if (header_type == GeometryType::POINT) { + if (header_type == LegacyGeometryType::POINT) { cursor.Skip(4); // skip padding // Read the point diff --git a/src/spatial/geometry/vertex.hpp b/src/spatial/geometry/vertex.hpp index 0f79b71f..1fd2e0f3 100644 --- a/src/spatial/geometry/vertex.hpp +++ b/src/spatial/geometry/vertex.hpp @@ -147,58 +147,4 @@ struct PointXYZM : PointXYZ { } }; -// TODO: Deprecate these for the generic PointXY, PointXYZ, PointXYM, PointXYZM instead -enum class VertexType : uint8_t { XY, XYZ, XYM, XYZM }; -struct VertexXY : public PointXY { - static const constexpr VertexType TYPE = VertexType::XY; - static const constexpr bool IS_VERTEX = true; - static const constexpr bool HAS_Z = false; - static const constexpr bool HAS_M = false; - - VertexXY() = default; - explicit VertexXY(double val) : PointXY(val) { - } - VertexXY(double x, double y) : PointXY(x, y) { - } -}; - -struct VertexXYZ : public PointXYZ { - static const constexpr VertexType TYPE = VertexType::XYZ; - static const constexpr bool IS_VERTEX = true; - static const constexpr bool HAS_Z = true; - static const constexpr bool HAS_M = false; - - VertexXYZ() = default; - explicit VertexXYZ(double val) : PointXYZ(val) { - } - VertexXYZ(double x, double y, double z) : PointXYZ(x, y, z) { - } -}; - -struct VertexXYM : public PointXYM { - static const constexpr VertexType TYPE = VertexType::XYM; - static const constexpr bool IS_VERTEX = true; - static const constexpr bool HAS_Z = false; - static const constexpr bool HAS_M = true; - - VertexXYM() = default; - explicit VertexXYM(double val) : PointXYM(val) { - } - VertexXYM(double x, double y, double m) : PointXYM(x, y, m) { - } -}; - -struct VertexXYZM : public PointXYZM { - static const constexpr VertexType TYPE = VertexType::XYZM; - static const constexpr bool IS_VERTEX = true; - static const constexpr bool HAS_Z = true; - static const constexpr bool HAS_M = true; - - VertexXYZM() = default; - explicit VertexXYZM(double val) : PointXYZM(val) { - } - VertexXYZM(double x, double y, double z, double m) : PointXYZM(x, y, z, m) { - } -}; - } // namespace duckdb \ No newline at end of file diff --git a/src/spatial/index/rtree/rtree_index_create_logical.cpp b/src/spatial/index/rtree/rtree_index_create_logical.cpp index 15bfb0ac..263d4136 100644 --- a/src/spatial/index/rtree/rtree_index_create_logical.cpp +++ b/src/spatial/index/rtree/rtree_index_create_logical.cpp @@ -54,7 +54,7 @@ static PhysicalOperator &CreateNullFilter(PhysicalPlanGenerator &generator, cons auto &is_empty_entry = catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "ST_IsEmpty") .Cast(); - auto is_empty_func = is_empty_entry.functions.GetFunctionByArguments(context, {GeoTypes::GEOMETRY()}); + auto is_empty_func = is_empty_entry.functions.GetFunctionByArguments(context, {LogicalType::GEOMETRY()}); vector> is_empty_args; is_empty_args.push_back(std::move(bound_ref)); auto is_empty_expr = make_uniq_base(LogicalType::BOOLEAN, is_empty_func, @@ -79,9 +79,9 @@ static PhysicalOperator &CreateBoundingBoxProjection(PhysicalPlanGenerator &plan auto &bbox_func_entry = catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, "ST_Extent_Approx") .Cast(); - auto bbox_func = bbox_func_entry.functions.GetFunctionByArguments(context, {GeoTypes::GEOMETRY()}); + auto bbox_func = bbox_func_entry.functions.GetFunctionByArguments(context, {LogicalType::GEOMETRY()}); - auto geom_ref_expr = make_uniq_base(GeoTypes::GEOMETRY(), 0); + auto geom_ref_expr = make_uniq_base(LogicalType::GEOMETRY(), 0); vector> bbox_args; bbox_args.push_back(std::move(geom_ref_expr)); @@ -151,7 +151,7 @@ PhysicalOperator &RTreeIndex::CreatePlan(PlanIndexInput &input) { auto &expr = op.unbound_expressions[0]; // Validate that we have the right type of expression (float array) - if (expr->return_type != GeoTypes::GEOMETRY()) { + if (expr->return_type != LogicalType::GEOMETRY()) { throw BinderException("RTree indexes can only be created over GEOMETRY columns."); } @@ -218,7 +218,7 @@ PhysicalOperator &LogicalCreateRTreeIndex::CreatePlan(ClientContext &context, Ph auto &expr = op.unbound_expressions[0]; // Validate that we have the right type of expression (float array) - if (expr->return_type != GeoTypes::GEOMETRY()) { + if (expr->return_type != LogicalType::GEOMETRY()) { throw BinderException("RTree indexes can only be created over GEOMETRY columns."); } diff --git a/src/spatial/index/rtree/rtree_index_create_physical.cpp b/src/spatial/index/rtree/rtree_index_create_physical.cpp index fd3e5c1b..69232a5f 100644 --- a/src/spatial/index/rtree/rtree_index_create_physical.cpp +++ b/src/spatial/index/rtree/rtree_index_create_physical.cpp @@ -7,10 +7,8 @@ #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" #include "duckdb/common/exception/transaction_exception.hpp" -#include "duckdb/main/attached_database.hpp" #include "duckdb/storage/storage_manager.hpp" #include "duckdb/storage/table_io_manager.hpp" -#include "duckdb/common/sort/sort.hpp" #include "duckdb/parallel/base_pipeline_event.hpp" namespace duckdb { diff --git a/src/spatial/index/rtree/rtree_index_plan_scan.cpp b/src/spatial/index/rtree/rtree_index_plan_scan.cpp index 33bae7e9..42a19567 100644 --- a/src/spatial/index/rtree/rtree_index_plan_scan.cpp +++ b/src/spatial/index/rtree/rtree_index_plan_scan.cpp @@ -99,11 +99,11 @@ class RTreeIndexScanOptimizer : public OptimizerExtension { // We can only optimize if there are two children return false; } - if (function.arguments[0] != GeoTypes::GEOMETRY()) { + if (function.arguments[0] != LogicalType::GEOMETRY()) { // We can only optimize if the first child is a GEOMETRY return false; } - if (function.arguments[1] != GeoTypes::GEOMETRY()) { + if (function.arguments[1] != LogicalType::GEOMETRY()) { // We can only optimize if the second child is a GEOMETRY return false; } diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index f24f0498..3f016acb 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -728,7 +728,7 @@ struct ST_Read : ArrowTableFunction { if (result->keep_wkb) { return_types.emplace_back(GeoTypes::WKB_BLOB()); } else { - return_types.emplace_back(GeoTypes::GEOMETRY()); + return_types.emplace_back(LogicalType::GEOMETRY()); if (column_name == "wkb_geometry") { column_name = "geom"; } @@ -979,7 +979,7 @@ struct ST_Read : ArrowTableFunction { // Found a geometry column // Convert the WKB columns to a geometry column - Vector geom_vec(GeoTypes::GEOMETRY(), output_size); + Vector geom_vec(LogicalType::GEOMETRY(), output_size); state.ConvertWKB(output.data[col_idx], geom_vec, output_size); output.data[col_idx].ReferenceAndSetType(geom_vec); @@ -1616,7 +1616,7 @@ struct ST_Write { }; static bool IsGeometryType(const LogicalType &type) { - return type == GeoTypes::WKB_BLOB() || type == GeoTypes::POINT_2D() || type == GeoTypes::GEOMETRY(); + return type == GeoTypes::WKB_BLOB() || type == GeoTypes::POINT_2D() || type == LogicalType::GEOMETRY(); } static unique_ptr OGRFieldTypeFromLogicalType(const string &name, const LogicalType &type) { @@ -1798,7 +1798,7 @@ struct ST_Write { return OGRGeometryUniquePtr(ptr); } - if (type == GeoTypes::GEOMETRY()) { + if (type == LogicalType::GEOMETRY()) { const auto blob = value.GetValueUnsafe(); uint32_t size; const auto wkb = WKBWriter::Write(blob, &size, arena); diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index d5b2556f..27a666a0 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -401,42 +401,42 @@ struct ST_AsMVTGeom { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_AsMVTGeom", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("bounds", GeoTypes::BOX_2D()); variant.AddParameter("extent", LogicalType::BIGINT); variant.AddParameter("buffer", LogicalType::BIGINT); variant.AddParameter("clip_geom", LogicalType::BOOLEAN); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); variant.SetBind(Bind); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("bounds", GeoTypes::BOX_2D()); variant.AddParameter("extent", LogicalType::BIGINT); variant.AddParameter("buffer", LogicalType::BIGINT); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); variant.SetBind(Bind); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("bounds", GeoTypes::BOX_2D()); variant.AddParameter("extent", LogicalType::BIGINT); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); variant.SetBind(Bind); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("bounds", GeoTypes::BOX_2D()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -474,8 +474,8 @@ struct ST_Boundary { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Boundary", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -572,32 +572,32 @@ struct ST_Buffer { FunctionBuilder::RegisterScalar(loader, "ST_Buffer", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("distance", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("distance", LogicalType::DOUBLE); variant.AddParameter("num_triangles", LogicalType::INTEGER); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteWithSegments); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("distance", LogicalType::DOUBLE); variant.AddParameter("num_triangles", LogicalType::INTEGER); variant.AddParameter("cap_style", LogicalType::VARCHAR); variant.AddParameter("join_style", LogicalType::VARCHAR); variant.AddParameter("mitre_limit", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteWithStyle); @@ -631,8 +631,8 @@ struct ST_BuildArea { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_BuildArea", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -656,8 +656,8 @@ struct ST_Contains : AsymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Contains", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -690,8 +690,8 @@ struct ST_ContainsProperly : AsymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_WithinProperly", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -759,10 +759,10 @@ struct ST_ConcaveHull { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_ConcaveHull", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("ratio", LogicalType::DOUBLE); variant.AddParameter("allowHoles", LogicalType::BOOLEAN); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -793,8 +793,8 @@ struct ST_ConvexHull { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_ConvexHull", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -871,17 +871,17 @@ struct ST_CoverageInvalidEdges { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_CoverageInvalidEdges", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); + variant.AddParameter("geoms", LogicalType::LIST(LogicalType::GEOMETRY())); variant.AddParameter("tolerance", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geoms", LogicalType::LIST(LogicalType::GEOMETRY())); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetBind(Bind); @@ -956,19 +956,19 @@ struct ST_CoverageSimplify { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_CoverageSimplify", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); + variant.AddParameter("geoms", LogicalType::LIST(LogicalType::GEOMETRY())); variant.AddParameter("tolerance", LogicalType::DOUBLE); variant.AddParameter("simplify_boundary", LogicalType::BOOLEAN); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); + variant.AddParameter("geoms", LogicalType::LIST(LogicalType::GEOMETRY())); variant.AddParameter("tolerance", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetBind(Bind); @@ -1032,8 +1032,8 @@ struct ST_CoverageUnion { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_CoverageUnion", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geoms", LogicalType::LIST(LogicalType::GEOMETRY())); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1059,8 +1059,8 @@ struct ST_CoveredBy : AsymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_CoveredBy", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1084,8 +1084,8 @@ struct ST_Covers : AsymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Covers", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1109,8 +1109,8 @@ struct ST_Crosses : SymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Crosses", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1140,9 +1140,9 @@ struct ST_Difference { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Difference", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1165,8 +1165,8 @@ struct ST_Disjoint : SymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Disjoint", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1190,8 +1190,8 @@ struct ST_Distance : SymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Distance_GEOS", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -1280,8 +1280,8 @@ struct ST_DistanceWithin { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_DWithin_GEOS", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.AddParameter("distance", LogicalType::DOUBLE); variant.SetReturnType(LogicalType::BOOLEAN); @@ -1313,8 +1313,8 @@ struct ST_Equals { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Equals", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1342,8 +1342,8 @@ struct ST_Envelope { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Envelope", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1372,9 +1372,9 @@ struct ST_Intersection { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Intersection", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1399,8 +1399,8 @@ struct ST_Intersects : SymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Intersects", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1426,7 +1426,7 @@ struct ST_IsRing { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_IsRing", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1452,7 +1452,7 @@ struct ST_IsSimple { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_IsSimple", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1484,7 +1484,7 @@ struct ST_IsValid { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_IsValid", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1524,16 +1524,16 @@ struct ST_LineMerge { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_LineMerge", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("preserve_direction", LogicalType::BOOLEAN); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteWithDirection); }); @@ -1559,8 +1559,8 @@ struct ST_MakeValid { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_MakeValid", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1636,18 +1636,18 @@ struct ST_MaximumInscribedCircle { static void Register(ExtensionLoader &loader) { const auto result_type = LogicalType::STRUCT( - {{"center", GeoTypes::GEOMETRY()}, {"nearest", GeoTypes::GEOMETRY()}, {"radius", LogicalType::DOUBLE}}); + {{"center", LogicalType::GEOMETRY()}, {"nearest", LogicalType::GEOMETRY()}, {"radius", LogicalType::DOUBLE}}); FunctionBuilder::RegisterScalar(loader, "ST_MaximumInscribedCircle", [&](ScalarFunctionBuilder &func) { func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(result_type); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); }); func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("tolerance", LogicalType::DOUBLE); variant.SetReturnType(result_type); variant.SetInit(LocalState::Init); @@ -1690,8 +1690,8 @@ struct ST_MinimumRotatedRectangle { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_MinimumRotatedRectangle", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1732,8 +1732,8 @@ struct ST_Node { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Node", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1759,8 +1759,8 @@ struct ST_Normalize { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Normalize", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1784,8 +1784,8 @@ struct ST_Overlaps : SymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Overlaps", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -1813,8 +1813,8 @@ struct ST_PointOnSurface { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_PointOnSurface", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1872,8 +1872,8 @@ struct ST_Polygonize { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Polygonize", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geometries", LogicalType::LIST(GeoTypes::GEOMETRY())); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geometries", LogicalType::LIST(LogicalType::GEOMETRY())); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1909,9 +1909,9 @@ struct ST_ReducePrecision { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_ReducePrecision", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("precision", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1949,17 +1949,17 @@ struct ST_RemoveRepeatedPoints { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_RemoveRepeatedPoints", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("tolerance", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteWithTolerance); @@ -1986,8 +1986,8 @@ struct ST_Reverse { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Reverse", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -2015,9 +2015,9 @@ struct ST_ShortestLine { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_ShortestLine", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -2043,9 +2043,9 @@ struct ST_Simplify { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Simplify", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("tolerance", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -2072,9 +2072,9 @@ struct ST_SimplifyPreserveTopology { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_SimplifyPreserveTopology", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("tolerance", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -2097,8 +2097,8 @@ struct ST_Touches : SymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Touches", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -2127,9 +2127,9 @@ struct ST_Union { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Union", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -2156,8 +2156,8 @@ struct ST_VoronoiDiagram { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_VoronoiDiagram", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -2180,8 +2180,8 @@ struct ST_Within : AsymmetricPreparedBinaryFunction { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Within", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -2310,7 +2310,7 @@ struct ST_MemUnion_Agg : GeosUnaryAggFunction { static void Register(ExtensionLoader &loader) { const auto agg = AggregateFunction::UnaryAggregateDestructor( - GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()); + LogicalType::GEOMETRY(), LogicalType::GEOMETRY()); FunctionBuilder::RegisterAggregate(loader, "ST_MemUnion_Agg", [&](AggregateFunctionBuilder &func) { func.SetFunction(agg); @@ -2335,7 +2335,7 @@ struct ST_Intersection_Agg : GeosUnaryAggFunction { static void Register(ExtensionLoader &loader) { const auto agg = AggregateFunction::UnaryAggregateDestructor( - GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()); + LogicalType::GEOMETRY(), LogicalType::GEOMETRY()); FunctionBuilder::RegisterAggregate(loader, "ST_Intersection_Agg", [&](AggregateFunctionBuilder &func) { func.SetFunction(agg); @@ -2526,7 +2526,7 @@ struct ST_Union_Agg { } static void Register(ExtensionLoader &loader) { - AggregateFunction agg({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), StateSize, Initialize, Update, Combine, + AggregateFunction agg({LogicalType::GEOMETRY()}, LogicalType::GEOMETRY(), StateSize, Initialize, Update, Combine, Finalize, nullptr, nullptr, Destroy); FunctionBuilder::RegisterAggregate(loader, "ST_Union_Agg", [&](AggregateFunctionBuilder &func) { @@ -2791,7 +2791,7 @@ struct ST_CoverageSimplify_Agg : GEOSCoverageAggFunction { static void Register(ExtensionLoader &loader) { using SELF = ST_CoverageSimplify_Agg; - AggregateFunction agg({GeoTypes::GEOMETRY(), LogicalType::DOUBLE}, GeoTypes::GEOMETRY(), StateSize, Initialize, + AggregateFunction agg({LogicalType::GEOMETRY(), LogicalType::DOUBLE}, LogicalType::GEOMETRY(), StateSize, Initialize, Update, Combine, Finalize, nullptr, Bind, Destroy); FunctionBuilder::RegisterAggregate(loader, "ST_CoverageSimplify_Agg", [&](AggregateFunctionBuilder &func) { @@ -2862,7 +2862,7 @@ struct ST_CoverageUnion_Agg : GEOSCoverageAggFunction { static void Register(ExtensionLoader &loader) { using SELF = ST_CoverageUnion_Agg; - const AggregateFunction agg({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), StateSize, Initialize, Update, + const AggregateFunction agg({LogicalType::GEOMETRY()}, LogicalType::GEOMETRY(), StateSize, Initialize, Update, Combine, Finalize, nullptr, nullptr, Destroy); FunctionBuilder::RegisterAggregate(loader, "ST_CoverageUnion_Agg", [&](AggregateFunctionBuilder &func) { @@ -2952,7 +2952,7 @@ struct ST_CoverageInvalidEdges_Agg : GEOSCoverageAggFunction { static void Register(ExtensionLoader &loader) { using SELF = ST_CoverageInvalidEdges_Agg; - AggregateFunction agg({GeoTypes::GEOMETRY()}, GeoTypes::GEOMETRY(), StateSize, Initialize, Update, Combine, + AggregateFunction agg({LogicalType::GEOMETRY()}, LogicalType::GEOMETRY(), StateSize, Initialize, Update, Combine, Finalize, nullptr, Bind, Destroy, nullptr); FunctionBuilder::RegisterAggregate(loader, "ST_CoverageInvalidEdges_Agg", [&](AggregateFunctionBuilder &func) { diff --git a/src/spatial/modules/geos/geos_serde.cpp b/src/spatial/modules/geos/geos_serde.cpp index 93ad4aa9..642336f3 100644 --- a/src/spatial/modules/geos/geos_serde.cpp +++ b/src/spatial/modules/geos/geos_serde.cpp @@ -470,13 +470,13 @@ class GEOSDeserializer final : GeometryProcessor { GEOSGeometry *ProcessCollection(CollectionState &state) override { GEOSGeomTypes collection_type = GEOS_GEOMETRYCOLLECTION; switch (CurrentType()) { - case GeometryType::MULTIPOINT: + case LegacyGeometryType::MULTIPOINT: collection_type = GEOS_MULTIPOINT; break; - case GeometryType::MULTILINESTRING: + case LegacyGeometryType::MULTILINESTRING: collection_type = GEOS_MULTILINESTRING; break; - case GeometryType::MULTIPOLYGON: + case LegacyGeometryType::MULTIPOLYGON: collection_type = GEOS_MULTIPOLYGON; break; default: diff --git a/src/spatial/modules/main/spatial_functions_aggregate.cpp b/src/spatial/modules/main/spatial_functions_aggregate.cpp index 5e59bf8b..c361ba66 100644 --- a/src/spatial/modules/main/spatial_functions_aggregate.cpp +++ b/src/spatial/modules/main/spatial_functions_aggregate.cpp @@ -143,7 +143,7 @@ void RegisterSpatialAggregateFunctions(ExtensionLoader &loader) { // TODO: Dont use geometry_t here const auto agg = AggregateFunction::UnaryAggregate( - GeoTypes::GEOMETRY(), GeoTypes::GEOMETRY()); + LogicalType::GEOMETRY(), LogicalType::GEOMETRY()); FunctionBuilder::RegisterAggregate(loader, "ST_Extent_Agg", [&](AggregateFunctionBuilder &func) { func.SetFunction(agg); diff --git a/src/spatial/modules/main/spatial_functions_cast.cpp b/src/spatial/modules/main/spatial_functions_cast.cpp index ef960c0d..7d67a6fd 100644 --- a/src/spatial/modules/main/spatial_functions_cast.cpp +++ b/src/spatial/modules/main/spatial_functions_cast.cpp @@ -171,7 +171,7 @@ struct GeometryCasts { //------------------------------------------------------------------------------------------------------------------ static void Register(ExtensionLoader &loader) { const auto wkb_type = GeoTypes::WKB_BLOB(); - const auto geom_type = GeoTypes::GEOMETRY(); + const auto geom_type = LogicalType::GEOMETRY(); // VARCHAR -> Geometry is explicitly castable loader.RegisterCastFunction(geom_type, LogicalType::VARCHAR, BoundCastInfo(ToVarcharCast), 1); @@ -392,16 +392,16 @@ struct PointCasts { // POINT_3D -> VARCHAR loader.RegisterCastFunction(GeoTypes::POINT_3D(), LogicalType::VARCHAR, BoundCastInfo(ToVarcharCast3D), 1); // POINT_2D -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::POINT_2D(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::POINT_2D(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast, nullptr, LocalState::InitCast), 1); // POINT_3D -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::POINT_3D(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::POINT_3D(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast3D, nullptr, LocalState::InitCast), 1); // GEOMETRY -> POINT_2D - loader.RegisterCastFunction(GeoTypes::GEOMETRY(), GeoTypes::POINT_2D(), + loader.RegisterCastFunction(LogicalType::GEOMETRY(), GeoTypes::POINT_2D(), BoundCastInfo(FromGeometryCast, nullptr, LocalState::InitCast), 1); // GEOMETRY -> POINT_3D - loader.RegisterCastFunction(GeoTypes::GEOMETRY(), GeoTypes::POINT_3D(), + loader.RegisterCastFunction(LogicalType::GEOMETRY(), GeoTypes::POINT_3D(), BoundCastInfo(FromGeometryCast3D, nullptr, LocalState::InitCast), 1); // POINT_3D -> POINT_2D loader.RegisterCastFunction(GeoTypes::POINT_3D(), GeoTypes::POINT_2D(), ToPoint2DCast, 1); @@ -411,10 +411,10 @@ struct PointCasts { // POINT_4D -> POINT_2D loader.RegisterCastFunction(GeoTypes::POINT_4D(), GeoTypes::POINT_2D(), ToPoint2DCast, 1); // POINT_4D -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::POINT_4D(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::POINT_4D(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast4D, nullptr, LocalState::InitCast), 1); // GEOMETRY -> POINT_4D - loader.RegisterCastFunction(GeoTypes::GEOMETRY(), GeoTypes::POINT_4D(), + loader.RegisterCastFunction(LogicalType::GEOMETRY(), GeoTypes::POINT_4D(), BoundCastInfo(FromGeometryCast4D, nullptr, LocalState::InitCast), 1); } }; @@ -607,16 +607,16 @@ struct LinestringCasts { // LINESTRING_3D -> VARCHAR loader.RegisterCastFunction(GeoTypes::LINESTRING_3D(), LogicalType::VARCHAR, BoundCastInfo(ToVarcharCast3D), 1); // LINESTRING_2D -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::LINESTRING_2D(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::LINESTRING_2D(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast, nullptr, LocalState::InitCast), 1); // LINESTRING_3D -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::LINESTRING_3D(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::LINESTRING_3D(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast3D, nullptr, LocalState::InitCast), 1); // GEOMETRY -> LINESTRING_2D - loader.RegisterCastFunction(GeoTypes::GEOMETRY(), GeoTypes::LINESTRING_2D(), + loader.RegisterCastFunction(LogicalType::GEOMETRY(), GeoTypes::LINESTRING_2D(), BoundCastInfo(FromGeometryCast, nullptr, LocalState::InitCast), 1); // GEOMETRY -> LINESTRING_3D - loader.RegisterCastFunction(GeoTypes::GEOMETRY(), GeoTypes::LINESTRING_3D(), + loader.RegisterCastFunction(LogicalType::GEOMETRY(), GeoTypes::LINESTRING_3D(), BoundCastInfo(FromGeometryCast3D, nullptr, LocalState::InitCast), 1); // LINESTRING_3D -> LINESTRING_2D loader.RegisterCastFunction(GeoTypes::LINESTRING_3D(), GeoTypes::LINESTRING_2D(), ToLine2DCast, 1); @@ -871,16 +871,16 @@ struct PolygonCasts { // POLYGON_3D -> VARCHAR loader.RegisterCastFunction(GeoTypes::POLYGON_3D(), LogicalType::VARCHAR, BoundCastInfo(ToVarcharCast3D), 1); // POLYGON_2D -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::POLYGON_2D(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::POLYGON_2D(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast, nullptr, LocalState::InitCast), 1); // POLYGON_3D -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::POLYGON_3D(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::POLYGON_3D(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast3D, nullptr, LocalState::InitCast), 1); // GEOMETRY -> POLYGON_2D - loader.RegisterCastFunction(GeoTypes::GEOMETRY(), GeoTypes::POLYGON_2D(), + loader.RegisterCastFunction(LogicalType::GEOMETRY(), GeoTypes::POLYGON_2D(), BoundCastInfo(FromGeometryCast, nullptr, LocalState::InitCast), 1); // GEOMETRY -> POLYGON_3D - loader.RegisterCastFunction(GeoTypes::GEOMETRY(), GeoTypes::POLYGON_3D(), + loader.RegisterCastFunction(LogicalType::GEOMETRY(), GeoTypes::POLYGON_3D(), BoundCastInfo(FromGeometryCast3D, nullptr, LocalState::InitCast), 1); // POLYGON_3D -> POLYGON_2D loader.RegisterCastFunction(GeoTypes::POLYGON_3D(), GeoTypes::POLYGON_2D(), ToPolygon2DCast, 1); @@ -954,11 +954,11 @@ struct BoxCasts { loader.RegisterCastFunction(GeoTypes::BOX_2D(), LogicalType::VARCHAR, BoundCastInfo(ToVarcharCast), 1); // BOX_2D -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::BOX_2D(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::BOX_2D(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast2D, nullptr, LocalState::InitCast), 1); // BOX_2F -> GEOMETRY - loader.RegisterCastFunction(GeoTypes::BOX_2DF(), GeoTypes::GEOMETRY(), + loader.RegisterCastFunction(GeoTypes::BOX_2DF(), LogicalType::GEOMETRY(), BoundCastInfo(ToGeometryCast2F, nullptr, LocalState::InitCast), 1); } }; @@ -1327,19 +1327,19 @@ class GeometryTextProcessor final : GeometryProcessor { void ProcessCollection(CollectionState &state, bool) override { bool collection_is_typed = false; switch (CurrentType()) { - case GeometryType::MULTIPOINT: + case LegacyGeometryType::MULTIPOINT: text += "MULTIPOINT"; collection_is_typed = true; break; - case GeometryType::MULTILINESTRING: + case LegacyGeometryType::MULTILINESTRING: text += "MULTILINESTRING"; collection_is_typed = true; break; - case GeometryType::MULTIPOLYGON: + case LegacyGeometryType::MULTIPOLYGON: text += "MULTIPOLYGON"; collection_is_typed = true; break; - case GeometryType::GEOMETRYCOLLECTION: + case LegacyGeometryType::GEOMETRYCOLLECTION: text += "GEOMETRYCOLLECTION"; collection_is_typed = false; break; diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 468dcaf1..f36fc614 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -222,7 +222,7 @@ struct ST_Affine { FunctionBuilder::RegisterScalar(loader, "ST_Affine", [](ScalarFunctionBuilder &func) { // GEOMETRY (3D) func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("a", LogicalType::DOUBLE); variant.AddParameter("b", LogicalType::DOUBLE); variant.AddParameter("c", LogicalType::DOUBLE); @@ -235,7 +235,7 @@ struct ST_Affine { variant.AddParameter("xoff", LogicalType::DOUBLE); variant.AddParameter("yoff", LogicalType::DOUBLE); variant.AddParameter("zoff", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute3D); @@ -243,14 +243,14 @@ struct ST_Affine { // GEOMETRY (2D) func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("a", LogicalType::DOUBLE); variant.AddParameter("b", LogicalType::DOUBLE); variant.AddParameter("d", LogicalType::DOUBLE); variant.AddParameter("e", LogicalType::DOUBLE); variant.AddParameter("xoff", LogicalType::DOUBLE); variant.AddParameter("yoff", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute2D); @@ -537,7 +537,7 @@ struct ST_Area { FunctionBuilder::RegisterScalar(loader, "ST_Area", [](ScalarFunctionBuilder &func) { // GEOMETRY func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -873,7 +873,7 @@ struct ST_AsGeoJSON { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_AsGeoJSON", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::JSON()); variant.SetInit(LocalState::Init); @@ -967,7 +967,7 @@ struct ST_AsText { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_AsText", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::VARCHAR); variant.SetFunction(ExecuteGeometry); @@ -1041,7 +1041,7 @@ struct ST_AsWKB { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_AsWKB", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(GeoTypes::WKB_BLOB()); variant.SetFunction(Execute); @@ -1108,7 +1108,7 @@ struct ST_AsHEXWKB { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_AsHEXWKB", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::VARCHAR); variant.SetFunction(Execute); @@ -1345,7 +1345,7 @@ struct ST_AsSVG { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_AsSVG", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("relative", LogicalType::BOOLEAN); variant.AddParameter("precision", LogicalType::INTEGER); @@ -1595,8 +1595,8 @@ struct ST_Centroid { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Centroid", [&](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteGeometry); @@ -1795,8 +1795,8 @@ struct ST_Collect { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Collect", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geoms", LogicalType::LIST(LogicalType::GEOMETRY())); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -1986,17 +1986,17 @@ struct ST_CollectionExtract { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_CollectionExtract", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("type", LogicalType::INTEGER); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteTyped); }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteAuto); @@ -2215,7 +2215,7 @@ struct ST_Dimension { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Dimension", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::INTEGER); variant.SetInit(LocalState::Init); @@ -2346,8 +2346,8 @@ struct ST_Azimuth { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Azimuth", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("origin", GeoTypes::GEOMETRY()); - variant.AddParameter("target", GeoTypes::GEOMETRY()); + variant.AddParameter("origin", LogicalType::GEOMETRY()); + variant.AddParameter("target", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -2628,8 +2628,8 @@ struct ST_Distance { }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -2792,8 +2792,8 @@ struct ST_DistanceWithin { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_DWithin", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.AddParameter("distance", LogicalType::DOUBLE); variant.SetReturnType(LogicalType::BOOLEAN); @@ -2985,10 +2985,10 @@ struct ST_Dump { FunctionBuilder::RegisterScalar(loader, "ST_Dump", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::LIST(LogicalType::STRUCT( - {{"geom", GeoTypes::GEOMETRY()}, {"path", LogicalType::LIST(LogicalType::INTEGER)}}))); + {{"geom", LogicalType::GEOMETRY()}, {"path", LogicalType::LIST(LogicalType::INTEGER)}}))); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -3062,9 +3062,9 @@ struct ST_Expand { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Expand", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("distance", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -3210,7 +3210,7 @@ struct ST_Extent { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Extent", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(GeoTypes::BOX_2D()); variant.SetInit(LocalState::Init); @@ -3296,7 +3296,7 @@ struct ST_Extent_Approx { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Extent_Approx", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(GeoTypes::BOX_2DF()); variant.SetFunction(Execute); @@ -3410,7 +3410,7 @@ struct Op_IntersectApprox { FunctionBuilder::RegisterScalar(loader, "&&", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("box", GeoTypes::BOX_2D()); - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetFunction(Execute); @@ -3562,8 +3562,8 @@ struct ST_ExteriorRing { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_ExteriorRing", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteGeometry); @@ -3768,8 +3768,8 @@ struct ST_FlipCoordinates { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_FlipCoordinates", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteGeometry); @@ -3880,7 +3880,7 @@ struct ST_ForceBase { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, IMPL::NAME, [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); if (IMPL::HAS_Z) { variant.AddParameter("z", LogicalType::DOUBLE); @@ -3889,7 +3889,7 @@ struct ST_ForceBase { variant.AddParameter("m", LogicalType::DOUBLE); } - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -3988,7 +3988,7 @@ struct ST_GeometryType { static unique_ptr Bind(ClientContext &context, ScalarFunction &bound_function, vector> &arguments) { // Create an enum type for all geometry types - // Ensure that these are in the same order as the GeometryType enum + // Ensure that these are in the same order as the LegacyGeometryType enum const vector enum_values = {"POINT", "LINESTRING", "POLYGON", "MULTIPOINT", "MULTILINESTRING", "MULTIPOLYGON", "GEOMETRYCOLLECTION", // or... @@ -4095,7 +4095,7 @@ struct ST_GeometryType { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_GeometryType", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalTypeId::ANY); variant.SetBind(Bind); @@ -4229,7 +4229,7 @@ struct ST_GeomFromHEXWKB { FunctionBuilder::RegisterScalar(loader, alias, [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("hexwkb", LogicalType::VARCHAR); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -4643,7 +4643,7 @@ struct ST_GeomFromGeoJSON { FunctionBuilder::RegisterScalar(loader, "ST_GeomFromGeoJSON", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("geojson", LogicalType::JSON()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -4651,7 +4651,7 @@ struct ST_GeomFromGeoJSON { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("geojson", LogicalType::VARCHAR); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -4772,7 +4772,7 @@ struct ST_GeomFromText { FunctionBuilder::RegisterScalar(loader, "ST_GeomFromText", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("wkt", LogicalType::VARCHAR); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetBind(Bind); @@ -4782,7 +4782,7 @@ struct ST_GeomFromText { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("wkt", LogicalType::VARCHAR); variant.AddParameter("ignore_invalid", LogicalType::BOOLEAN); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetBind(Bind); variant.SetInit(LocalState::Init); @@ -5131,7 +5131,7 @@ struct ST_GeomFromWKB { FunctionBuilder::RegisterScalar(loader, "ST_GeomFromWKB", [](ScalarFunctionBuilder &builder) { builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteGeometry); @@ -5139,7 +5139,7 @@ struct ST_GeomFromWKB { builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("blob", LogicalType::BLOB); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteGeometry); @@ -5223,7 +5223,7 @@ struct ST_HasZ { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_HasZ", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -5316,7 +5316,7 @@ struct ST_HasM { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_HasM", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -5385,9 +5385,9 @@ struct ST_LineInterpolatePoint { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_LineInterpolatePoint", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); variant.AddParameter("fraction", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteGeometry); @@ -5461,10 +5461,10 @@ struct ST_LineInterpolatePoints { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_LineInterpolatePoints", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); variant.AddParameter("fraction", LogicalType::DOUBLE); variant.AddParameter("repeat", LogicalType::BOOLEAN); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetFunction(ExecuteGeometry); variant.SetInit(LocalState::Init); @@ -5533,8 +5533,8 @@ struct ST_LineLocatePoint { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_LineLocatePoint", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); - variant.AddParameter("point", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); + variant.AddParameter("point", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -5593,10 +5593,10 @@ struct ST_LineSubstring { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_LineSubstring", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); variant.AddParameter("start_fraction", LogicalType::DOUBLE); variant.AddParameter("end_fraction", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetFunction(ExecuteGeometry); variant.SetInit(LocalState::Init); @@ -5681,10 +5681,10 @@ struct ST_LocateAlong { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_LocateAlong", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); variant.AddParameter("measure", LogicalType::DOUBLE); variant.AddParameter("offset", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetBind(Bind); variant.SetInit(LocalState::Init); @@ -5692,9 +5692,9 @@ struct ST_LocateAlong { }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); variant.AddParameter("measure", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetBind(Bind); variant.SetInit(LocalState::Init); @@ -5816,11 +5816,11 @@ struct ST_LocateBetween { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_LocateBetween", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); variant.AddParameter("start_measure", LogicalType::DOUBLE); variant.AddParameter("end_measure", LogicalType::DOUBLE); variant.AddParameter("offset", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetBind(Bind); variant.SetInit(LocalState::Init); @@ -5828,10 +5828,10 @@ struct ST_LocateBetween { }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); variant.AddParameter("start_measure", LogicalType::DOUBLE); variant.AddParameter("end_measure", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetBind(Bind); variant.SetInit(LocalState::Init); @@ -5944,7 +5944,7 @@ struct ST_ZMFlag { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_ZMFlag", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::UTINYINT); variant.SetInit(LocalState::Init); @@ -6040,8 +6040,8 @@ struct ST_Distance_Sphere { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Distance_Sphere", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -6215,7 +6215,7 @@ struct ST_Hilbert { }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("bounds", GeoTypes::BOX_2D()); variant.SetReturnType(LogicalType::UINTEGER); @@ -6224,7 +6224,7 @@ struct ST_Hilbert { }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::UINTEGER); variant.SetFunction(ExecuteGeometry); @@ -6318,8 +6318,8 @@ struct ST_InterpolatePoint { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_InterpolatePoint", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("line", GeoTypes::GEOMETRY()); - variant.AddParameter("point", GeoTypes::GEOMETRY()); + variant.AddParameter("line", LogicalType::GEOMETRY()); + variant.AddParameter("point", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -6436,8 +6436,8 @@ struct ST_IntersectsExtent { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Intersects_Extent", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom1", GeoTypes::GEOMETRY()); - variant.AddParameter("geom2", GeoTypes::GEOMETRY()); + variant.AddParameter("geom1", LogicalType::GEOMETRY()); + variant.AddParameter("geom2", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -6494,7 +6494,7 @@ struct ST_IsClosed { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_IsClosed", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -6560,7 +6560,7 @@ struct ST_IsEmpty { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_IsEmpty", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::BOOLEAN); variant.SetInit(LocalState::Init); @@ -6659,7 +6659,7 @@ struct ST_Length { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Length", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -6742,7 +6742,7 @@ struct ST_MakeEnvelope { variant.AddParameter("min_y", LogicalType::DOUBLE); variant.AddParameter("max_x", LogicalType::DOUBLE); variant.AddParameter("max_y", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -6942,8 +6942,8 @@ struct ST_MakeLine { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_MakeLine", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geoms", LogicalType::LIST(GeoTypes::GEOMETRY())); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geoms", LogicalType::LIST(LogicalType::GEOMETRY())); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteList); @@ -6953,9 +6953,9 @@ struct ST_MakeLine { }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("start", GeoTypes::GEOMETRY()); - variant.AddParameter("end", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("start", LogicalType::GEOMETRY()); + variant.AddParameter("end", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteBinary); @@ -7092,8 +7092,8 @@ struct ST_MakePolygon { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_MakePolygon", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("shell", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("shell", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteFromShell); @@ -7105,9 +7105,9 @@ struct ST_MakePolygon { }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("shell", GeoTypes::GEOMETRY()); - variant.AddParameter("holes", LogicalType::LIST(GeoTypes::GEOMETRY())); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("shell", LogicalType::GEOMETRY()); + variant.AddParameter("holes", LogicalType::LIST(LogicalType::GEOMETRY())); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteFromRings); @@ -7207,8 +7207,8 @@ struct ST_MakeBox2D { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_MakeBox2D", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("point1", GeoTypes::GEOMETRY()); - variant.AddParameter("point2", GeoTypes::GEOMETRY()); + variant.AddParameter("point1", LogicalType::GEOMETRY()); + variant.AddParameter("point2", LogicalType::GEOMETRY()); variant.SetReturnType(GeoTypes::BOX_2D()); variant.SetInit(LocalState::Init); @@ -7294,8 +7294,8 @@ struct ST_Multi { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Multi", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -7364,7 +7364,7 @@ struct ST_NGeometries { for (auto &alias : {"ST_NumGeometries", "ST_NGeometries"}) { FunctionBuilder::RegisterScalar(loader, alias, [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::INTEGER); variant.SetInit(LocalState::Init); @@ -7437,7 +7437,7 @@ struct ST_NInteriorRings { for (auto &alias : {"ST_NumInteriorRings", "ST_NInteriorRings"}) { FunctionBuilder::RegisterScalar(loader, alias, [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::INTEGER); variant.SetInit(LocalState::Init); @@ -7553,7 +7553,7 @@ struct ST_NPoints { for (const auto &alias : {"ST_NumPoints", "ST_NPoints"}) { FunctionBuilder::RegisterScalar(loader, alias, [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::UINTEGER); variant.SetInit(LocalState::Init); @@ -7686,7 +7686,7 @@ struct ST_Perimeter { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Perimeter", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -7858,7 +7858,7 @@ struct ST_Point { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("x", LogicalType::DOUBLE); variant.AddParameter("y", LogicalType::DOUBLE); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetFunction(ExecuteGeometry); variant.SetInit(LocalState::Init); @@ -8082,9 +8082,9 @@ struct ST_PointN { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_PointN", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("index", LogicalType::INTEGER); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -8155,8 +8155,8 @@ struct ST_Points { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Points", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(Execute); @@ -8291,7 +8291,7 @@ struct ST_QuadKey { }); func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("point", GeoTypes::GEOMETRY()); + variant.AddParameter("point", LogicalType::GEOMETRY()); variant.AddParameter("level", LogicalType::INTEGER); variant.SetReturnType(LogicalType::VARCHAR); variant.SetFunction(ExecuteGeometry); @@ -8703,8 +8703,8 @@ struct ST_StartPoint { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_StartPoint", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteGeometry); @@ -8827,8 +8827,8 @@ struct ST_EndPoint { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_EndPoint", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteGeometry); @@ -8967,7 +8967,7 @@ struct PointAccessFunctionBase { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, OP::NAME, [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); @@ -9204,7 +9204,7 @@ struct VertexAggFunctionBase { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, OP::NAME, [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(LocalState::Init); diff --git a/src/spatial/modules/mvt/mvt_module.cpp b/src/spatial/modules/mvt/mvt_module.cpp index ef636733..b4b25d19 100644 --- a/src/spatial/modules/mvt/mvt_module.cpp +++ b/src/spatial/modules/mvt/mvt_module.cpp @@ -155,7 +155,7 @@ struct ST_TileEnvelope { variant.AddParameter("tile_zoom", LogicalType::INTEGER); variant.AddParameter("tile_x", LogicalType::INTEGER); variant.AddParameter("tile_y", LogicalType::INTEGER); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(LocalState::Init); variant.SetFunction(ExecuteWebMercator); }); @@ -925,7 +925,7 @@ struct ST_AsMVT { // Look for the first geometry column for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { auto &child = StructType::GetChildType(row_type, i); - if (child == GeoTypes::GEOMETRY()) { + if (child == LogicalType::GEOMETRY()) { if (geom_idx != optional_idx::Invalid()) { throw InvalidInputException("ST_AsMVT: only one geometry column is allowed in the input row"); } @@ -937,7 +937,7 @@ struct ST_AsMVT { for (idx_t i = 0; i < StructType::GetChildCount(row_type); i++) { auto &child = StructType::GetChildType(row_type, i); auto &child_name = StructType::GetChildName(row_type, i); - if (child == GeoTypes::GEOMETRY() && child_name == geom_name) { + if (child == LogicalType::GEOMETRY() && child_name == geom_name) { if (geom_idx != optional_idx::Invalid()) { throw InvalidInputException("ST_AsMVT: only one geometry column is allowed in the input row"); } diff --git a/src/spatial/modules/proj/proj_module.cpp b/src/spatial/modules/proj/proj_module.cpp index e9db3032..59659a90 100644 --- a/src/spatial/modules/proj/proj_module.cpp +++ b/src/spatial/modules/proj/proj_module.cpp @@ -457,10 +457,10 @@ struct ST_Transform { }); func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("source_crs", LogicalType::VARCHAR); variant.AddParameter("target_crs", LogicalType::VARCHAR); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(ProjFunctionLocalState::Init); variant.SetBind(Bind); @@ -468,11 +468,11 @@ struct ST_Transform { }); func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.AddParameter("source_crs", LogicalType::VARCHAR); variant.AddParameter("target_crs", LogicalType::VARCHAR); variant.AddParameter("always_xy", LogicalType::BOOLEAN); - variant.SetReturnType(GeoTypes::GEOMETRY()); + variant.SetReturnType(LogicalType::GEOMETRY()); variant.SetInit(ProjFunctionLocalState::Init); variant.SetBind(Bind); @@ -684,7 +684,7 @@ struct ST_Area_Spheroid { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Area_Spheroid", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(GeodesicLocalState::InitPolygon); @@ -840,7 +840,7 @@ struct ST_Perimeter_Spheroid { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Perimeter_Spheroid", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(GeodesicLocalState::InitPolygon); @@ -974,7 +974,7 @@ struct ST_Length_Spheroid { static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Length_Spheroid", [](ScalarFunctionBuilder &func) { func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", GeoTypes::GEOMETRY()); + variant.AddParameter("geom", LogicalType::GEOMETRY()); variant.SetReturnType(LogicalType::DOUBLE); variant.SetInit(GeodesicLocalState::InitLine); diff --git a/src/spatial/modules/shapefile/shapefile_module.cpp b/src/spatial/modules/shapefile/shapefile_module.cpp index 12d4f3d4..4ff966c2 100644 --- a/src/spatial/modules/shapefile/shapefile_module.cpp +++ b/src/spatial/modules/shapefile/shapefile_module.cpp @@ -403,7 +403,7 @@ struct ST_ReadSHP { } // Always return geometry last - return_types.push_back(GeoTypes::GEOMETRY()); + return_types.push_back(LogicalType::GEOMETRY()); names.push_back("geom"); // Deduplicate field names if necessary @@ -829,7 +829,7 @@ struct ST_ReadSHP { const auto projected_col_idx = gstate.column_ids[col_idx]; auto &col_vec = output.data[col_idx]; - if (col_vec.GetType() == GeoTypes::GEOMETRY()) { + if (col_vec.GetType() == LogicalType::GEOMETRY()) { ConvertGeometryVector(col_vec, record_start, output_size, gstate.shp_handle.get(), gstate.arena, bind_data.shape_type); } else { diff --git a/src/spatial/operators/spatial_join_optimizer.cpp b/src/spatial/operators/spatial_join_optimizer.cpp index cf261946..a57af821 100644 --- a/src/spatial/operators/spatial_join_optimizer.cpp +++ b/src/spatial/operators/spatial_join_optimizer.cpp @@ -141,8 +141,8 @@ static void InsertSpatialJoin(OptimizerExtensionInput &input, unique_ptrreturn_type != GeoTypes::GEOMETRY() || - func.children[1]->return_type != GeoTypes::GEOMETRY()) { + if (func.children[0]->return_type != LogicalType::GEOMETRY() || + func.children[1]->return_type != LogicalType::GEOMETRY()) { extra_predicates.push_back(std::move(expr)); continue; } diff --git a/src/spatial/operators/spatial_join_physical.cpp b/src/spatial/operators/spatial_join_physical.cpp index 50e0eb67..b18c6f48 100644 --- a/src/spatial/operators/spatial_join_physical.cpp +++ b/src/spatial/operators/spatial_join_physical.cpp @@ -483,12 +483,12 @@ class SpatialJoinLocalState final : public LocalSinkState { auto &catalog = Catalog::GetSystemCatalog(context); auto &entry = catalog.GetEntry(context, DEFAULT_SCHEMA, "ST_IsEmpty"); - auto func = entry.functions.GetFunctionByArguments(context, {GeoTypes::GEOMETRY()}); + auto func = entry.functions.GetFunctionByArguments(context, {LogicalType::GEOMETRY()}); auto is_empty_expr = make_uniq(LogicalTypeId::BOOLEAN, func, vector> {}, nullptr); is_empty_expr->children.push_back( - make_uniq_base(GeoTypes::GEOMETRY(), 0)); + make_uniq_base(LogicalType::GEOMETRY(), 0)); auto is_not_empty_expr = make_uniq(ExpressionType::OPERATOR_NOT, LogicalTypeId::BOOLEAN); @@ -497,7 +497,7 @@ class SpatialJoinLocalState final : public LocalSinkState { auto is_not_null_expr = make_uniq(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalTypeId::BOOLEAN); is_not_null_expr->children.push_back( - make_uniq_base(GeoTypes::GEOMETRY(), 0)); + make_uniq_base(LogicalType::GEOMETRY(), 0)); auto filter_expr = make_uniq_base( ExpressionType::CONJUNCTION_AND, std::move(is_not_empty_expr), std::move(is_not_null_expr)); @@ -612,7 +612,7 @@ SinkFinalizeType PhysicalSpatialJoin::Finalize(Pipeline &pipeline, Event &event, Vector row_pointer_vector(LogicalType::POINTER, reinterpret_cast(rows_ptr)); auto &sel = *FlatVector::IncrementalSelectionVector(); - Vector geom_vec(GeoTypes::GEOMETRY()); + Vector geom_vec(LogicalType::GEOMETRY()); auto &validity = FlatVector::Validity(geom_vec); do { diff --git a/src/spatial/spatial_extension.cpp b/src/spatial/spatial_extension.cpp index 891d29b0..d398e615 100644 --- a/src/spatial/spatial_extension.cpp +++ b/src/spatial/spatial_extension.cpp @@ -16,7 +16,6 @@ #include "spatial/modules/shapefile/shapefile_module.hpp" #include "spatial/operators/spatial_operator_extension.hpp" #include "spatial/operators/spatial_join_optimizer.hpp" -#include "spatial/spatial_geoarrow.hpp" #include "spatial/spatial_types.hpp" namespace duckdb { @@ -31,7 +30,6 @@ static void LoadInternal(ExtensionLoader &loader) { RegisterSpatialAggregateFunctions(loader); RegisterSpatialTableFunctions(loader); SpatialJoinOptimizer::Register(loader); - GeoArrow::Register(loader); RegisterProjModule(loader); RegisterGDALModule(loader); diff --git a/src/spatial/spatial_geoarrow.cpp b/src/spatial/spatial_geoarrow.cpp index 83cbf333..7c1cb82a 100644 --- a/src/spatial/spatial_geoarrow.cpp +++ b/src/spatial/spatial_geoarrow.cpp @@ -43,13 +43,13 @@ struct GeoArrowWKB { const auto format = string(schema.format); if (format == "z") { - return make_uniq(GeoTypes::GEOMETRY(), + return make_uniq(, make_uniq(ArrowVariableSizeType::NORMAL)); } else if (format == "Z") { - return make_uniq(GeoTypes::GEOMETRY(), + return make_uniq(LogicalType::GEOMETRY(), make_uniq(ArrowVariableSizeType::SUPER_SIZE)); } else if (format == "vz") { - return make_uniq(GeoTypes::GEOMETRY(), make_uniq(ArrowVariableSizeType::VIEW)); + return make_uniq(LogicalType::GEOMETRY(), make_uniq(ArrowVariableSizeType::VIEW)); } throw InvalidInputException("Arrow extension type \"%s\" not supported for geoarrow.wkb", format.c_str()); } @@ -115,7 +115,7 @@ struct GeoArrowWKB { void RegisterArrowExtensions(DBConfig &config) { config.RegisterArrowExtension( {"geoarrow.wkb", GeoArrowWKB::PopulateSchema, GeoArrowWKB::GetType, - make_shared_ptr(GeoTypes::GEOMETRY(), LogicalType::BLOB, GeoArrowWKB::ArrowToDuck, + make_shared_ptr(LogicalType::GEOMETRY(), LogicalType::BLOB, GeoArrowWKB::ArrowToDuck, GeoArrowWKB::DuckToArrow)}); } @@ -140,7 +140,7 @@ void GeoArrowRegisterScan(ClientContext &context, TableFunctionInput &data_p, Da } DBConfig &config = DatabaseInstance::GetDatabase(context).config; - if (config.HasArrowExtension(GeoTypes::GEOMETRY())) { + if (config.HasArrowExtension(LogicalType::GEOMETRY())) { output.SetValue(0, 0, false); } else { RegisterArrowExtensions(config); diff --git a/src/spatial/spatial_types.cpp b/src/spatial/spatial_types.cpp index 5dde96e2..9d54d251 100644 --- a/src/spatial/spatial_types.cpp +++ b/src/spatial/spatial_types.cpp @@ -70,7 +70,7 @@ LogicalType GeoTypes::POLYGON_3D() { return type; } -LogicalType GeoTypes::GEOMETRY() { +LogicalType GeoTypes::LEGACY_GEOMETRY() { auto blob_type = LogicalType(LogicalTypeId::BLOB); blob_type.SetAlias("GEOMETRY"); return blob_type; @@ -124,7 +124,7 @@ void GeoTypes::Register(ExtensionLoader &loader) { loader.RegisterType("BOX_2DF", GeoTypes::BOX_2DF()); // GEOMETRY - loader.RegisterType("GEOMETRY", GeoTypes::GEOMETRY()); + loader.RegisterType("GEOMETRY", GeoTypes::LEGACY_GEOMETRY()); // WKB_BLOB loader.RegisterType("WKB_BLOB", GeoTypes::WKB_BLOB()); diff --git a/src/spatial/spatial_types.hpp b/src/spatial/spatial_types.hpp index a149e230..d968777f 100644 --- a/src/spatial/spatial_types.hpp +++ b/src/spatial/spatial_types.hpp @@ -20,7 +20,8 @@ struct GeoTypes { static LogicalType BOX_2DF(); static LogicalType WKB_BLOB(); - static LogicalType GEOMETRY(); + // Old geometry type (pre v1.5) + static LogicalType LEGACY_GEOMETRY(); static void Register(ExtensionLoader &loader); From 077249963fa84f9d75338adc09f19f41e930eb8e Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 12 Nov 2025 10:44:03 +0100 Subject: [PATCH 17/41] adjust serialization, cleanup old code --- duckdb | 2 +- src/sgl/sgl.cpp | 80 +++ src/sgl/sgl.hpp | 4 + src/spatial/CMakeLists.txt | 5 +- src/spatial/geometry/CMakeLists.txt | 7 +- src/spatial/geometry/geometry_processor.cpp | 7 - src/spatial/geometry/geometry_processor.hpp | 337 ---------- src/spatial/geometry/geometry_properties.hpp | 62 -- .../geometry/geometry_serialization.cpp | 622 +++++++----------- .../geometry/geometry_serialization.hpp | 5 + src/spatial/geometry/geometry_type.hpp | 159 ----- src/spatial/geometry/vertex.hpp | 2 +- src/spatial/geometry/wkb_writer.cpp | 194 ------ src/spatial/geometry/wkb_writer.hpp | 24 - src/spatial/index/rtree/rtree_index.cpp | 10 +- .../index/rtree/rtree_index_plan_scan.cpp | 10 +- src/spatial/modules/gdal/gdal_module.cpp | 7 +- src/spatial/modules/geos/geos_module.cpp | 17 +- src/spatial/modules/geos/geos_serde.cpp | 508 +++++--------- .../modules/main/spatial_functions.hpp | 1 - .../main/spatial_functions_aggregate.cpp | 1 - .../modules/main/spatial_functions_cast.cpp | 264 +------- .../modules/main/spatial_functions_scalar.cpp | 53 +- .../operators/spatial_join_physical.cpp | 11 +- src/spatial/spatial_geoarrow.cpp | 161 ----- src/spatial/spatial_geoarrow.hpp | 11 - src/spatial/util/binary_reader.hpp | 7 +- 27 files changed, 551 insertions(+), 2020 deletions(-) delete mode 100644 src/spatial/geometry/geometry_processor.cpp delete mode 100644 src/spatial/geometry/geometry_processor.hpp delete mode 100644 src/spatial/geometry/geometry_properties.hpp delete mode 100644 src/spatial/geometry/geometry_type.hpp delete mode 100644 src/spatial/geometry/wkb_writer.cpp delete mode 100644 src/spatial/geometry/wkb_writer.hpp delete mode 100644 src/spatial/spatial_geoarrow.cpp delete mode 100644 src/spatial/spatial_geoarrow.hpp diff --git a/duckdb b/duckdb index 39f5583f..6c5e16c2 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 39f5583f99dd7a921a1d9824ba4434061b8d91d7 +Subproject commit 6c5e16c2fb342c3218e5a4a59b6292dae3326e83 diff --git a/src/sgl/sgl.cpp b/src/sgl/sgl.cpp index c9612e5d..7b08e721 100644 --- a/src/sgl/sgl.cpp +++ b/src/sgl/sgl.cpp @@ -2382,6 +2382,86 @@ bool ops::get_euclidean_distance(const geometry &lhs_geom, const geometry &rhs_g return found; } +// Visit all non-collection geometries +template +void visit_leaf_pairs(const geometry &lhs, const geometry &rhs, CALLBACK &&callback) { + const auto lhs_root = lhs.get_parent(); + auto lhs_part = &lhs; + + while (true) { + switch (lhs_part->get_type()) { + case geometry_type::POINT: + case geometry_type::LINESTRING: + case geometry_type::POLYGON: { + + const auto rhs_root = rhs.get_parent(); + auto rhs_part = &rhs; + + while (true) { + switch (rhs_part->get_type()) { + case geometry_type::POINT: + case geometry_type::LINESTRING: + case geometry_type::POLYGON: { + // Found a pair of leaf geometries + // If the callback returns true, we stop visiting + if (callback(*lhs_part, *rhs_part)) { + return; + } + } break; + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::GEOMETRY_COLLECTION: + if (rhs_part->is_empty()) { + break; + } + rhs_part = rhs_part->get_first_part(); + continue; + default: + break; + } + + while (true) { + const auto parent = rhs_part->get_parent(); + if (parent == rhs_root) { + break; + } + if (rhs_part != parent->get_last_part()) { + rhs_part = rhs_part->get_next(); + break; + } + rhs_part = parent; + } + } + + } break; + case geometry_type::MULTI_POINT: + case geometry_type::MULTI_LINESTRING: + case geometry_type::MULTI_POLYGON: + case geometry_type::GEOMETRY_COLLECTION: + if (lhs_part->is_empty()) { + break; + } + lhs_part = lhs_part->get_first_part(); + continue; + default: + break; + } + + while (true) { + const auto parent = lhs_part->get_parent(); + if (parent == lhs_root) { + return; + } + if (lhs_part != parent->get_last_part()) { + lhs_part = lhs_part->get_next(); + break; + } + lhs_part = parent; + } + } +} + } // namespace sgl //====================================================================================================================== diff --git a/src/sgl/sgl.hpp b/src/sgl/sgl.hpp index 29f2af62..f08b4961 100644 --- a/src/sgl/sgl.hpp +++ b/src/sgl/sgl.hpp @@ -139,6 +139,10 @@ struct vertex_xyzm { return dummy; } } + + bool all_nan() const { + return std::isnan(x) && std::isnan(y) && std::isnan(z) && std::isnan(m); + } }; } // namespace sgl diff --git a/src/spatial/CMakeLists.txt b/src/spatial/CMakeLists.txt index 1c72211c..46595f45 100644 --- a/src/spatial/CMakeLists.txt +++ b/src/spatial/CMakeLists.txt @@ -5,7 +5,6 @@ add_subdirectory(index) add_subdirectory(operators) set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/spatial_extension.cpp + ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/spatial_extension.cpp ${CMAKE_CURRENT_SOURCE_DIR}/spatial_types.cpp -PARENT_SCOPE) \ No newline at end of file + PARENT_SCOPE) diff --git a/src/spatial/geometry/CMakeLists.txt b/src/spatial/geometry/CMakeLists.txt index 9c582ff3..c816b8d9 100644 --- a/src/spatial/geometry/CMakeLists.txt +++ b/src/spatial/geometry/CMakeLists.txt @@ -1,6 +1,3 @@ set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/geometry_processor.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/wkb_writer.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/geometry_serialization.cpp - PARENT_SCOPE) \ No newline at end of file + ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/geometry_serialization.cpp + PARENT_SCOPE) diff --git a/src/spatial/geometry/geometry_processor.cpp b/src/spatial/geometry/geometry_processor.cpp deleted file mode 100644 index d663b8b1..00000000 --- a/src/spatial/geometry/geometry_processor.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "spatial/geometry/geometry_processor.hpp" - -namespace duckdb { - -constexpr double VertexData::EMPTY_DATA; - -} // namespace duckdb diff --git a/src/spatial/geometry/geometry_processor.hpp b/src/spatial/geometry/geometry_processor.hpp deleted file mode 100644 index 394e03e8..00000000 --- a/src/spatial/geometry/geometry_processor.hpp +++ /dev/null @@ -1,337 +0,0 @@ -#pragma once - -#include "spatial/util/cursor.hpp" -#include "spatial/geometry/geometry_type.hpp" - -namespace duckdb { - -//------------------------------------------------------------------------ -// GeometryProcessor -//------------------------------------------------------------------------ -// The GeometryProcessor class is used to process a serialized geometry. -// By subclassing and overriding the appropriate methods an algorithm -// can be implemented to process the geometry in a streaming fashion. -//------------------------------------------------------------------------ - -//------------------------------------------------------------------------ -// VertexData -//------------------------------------------------------------------------ -class VertexData { -private: - static const constexpr double EMPTY_DATA = 0; - -public: - // The M axis is always in the fourth position and the Z axis is always in the third position - const_data_ptr_t data[4] = {const_data_ptr_cast(&EMPTY_DATA), const_data_ptr_cast(&EMPTY_DATA), - const_data_ptr_cast(&EMPTY_DATA), const_data_ptr_cast(&EMPTY_DATA)}; - ptrdiff_t stride[4] = {0, 0, 0, 0}; - uint32_t count = 0; - - VertexData(const_data_ptr_t data_ptr, uint32_t count, bool has_z, bool has_m) : count(count) { - // Get the data at the current cursor position - - // TODO: These calculations are all constant, we could move it to Execute() instead. - // TODO: Add GetX, GetY, GetZ, GetM methods - data[0] = data_ptr; - data[1] = data_ptr + sizeof(double); - if (has_z && has_m) { - data[2] = data_ptr + 2 * sizeof(double); - data[3] = data_ptr + 3 * sizeof(double); - } else if (has_z) { - data[2] = data_ptr + 2 * sizeof(double); - } else if (has_m) { - data[3] = data_ptr + 2 * sizeof(double); - } - - auto vertex_size = static_cast(sizeof(double) * (2 + (has_z ? 1 : 0) + (has_m ? 1 : 0))); - - stride[0] = vertex_size; - stride[1] = vertex_size; - stride[2] = has_z ? vertex_size : 0; - stride[3] = has_m ? vertex_size : 0; - } - - bool IsEmpty() const { - return count == 0; - } - - uint32_t ByteSize() const { - return count * sizeof(double) * (2 + (stride[2] != 0) + (stride[3] != 0)); - } -}; - -//------------------------------------------------------------------------ -// Helper so that we can return void from functions generically -//------------------------------------------------------------------------ -template -class ResultWrapper { -private: - RESULT tmp; - -public: - template - explicit ResultWrapper(F &&f) : tmp(std::move(f())) { - } - ResultWrapper(const ResultWrapper &other) = delete; - ResultWrapper &operator=(const ResultWrapper &other) = delete; - ResultWrapper(ResultWrapper &&other) = delete; - ResultWrapper &operator=(ResultWrapper &&other) = delete; - RESULT &&ReturnAndDestroy() { - return std::move(tmp); - } -}; - -template <> -class ResultWrapper { -public: - template - explicit ResultWrapper(F &&f) { - f(); - } - ResultWrapper(const ResultWrapper &other) = delete; - ResultWrapper &operator=(const ResultWrapper &other) = delete; - ResultWrapper(ResultWrapper &&other) = delete; - ResultWrapper &operator=(ResultWrapper &&other) = delete; - void ReturnAndDestroy() { - } -}; - -//------------------------------------------------------------------------ -// GeometryProcessor -//------------------------------------------------------------------------ -template -class GeometryProcessor { -private: - bool has_z = false; - bool has_m = false; - uint32_t nesting_level = 0; - LegacyGeometryType current_type = LegacyGeometryType::POINT; - LegacyGeometryType parent_type = LegacyGeometryType::POINT; - -protected: - bool HasZ() const { - return has_z; - } - bool HasM() const { - return has_m; - } - bool IsNested() const { - return nesting_level > 0; - } - uint32_t NestingLevel() const { - return nesting_level; - } - LegacyGeometryType CurrentType() const { - return current_type; - } - LegacyGeometryType ParentType() const { - return parent_type; - } - - class CollectionState { - private: - friend class GeometryProcessor; - uint32_t item_count; - uint32_t current_item; - GeometryProcessor &processor; - Cursor &cursor; - CollectionState(uint32_t item_count, GeometryProcessor &processor, Cursor &cursor) - : item_count(item_count), current_item(0), processor(processor), cursor(cursor) { - } - - public: - CollectionState(const CollectionState &other) = delete; - CollectionState &operator=(const CollectionState &other) = delete; - CollectionState(CollectionState &&other) = delete; - CollectionState &operator=(CollectionState &&other) = delete; - - uint32_t ItemCount() const { - return item_count; - } - bool IsDone() const { - return current_item >= item_count; - } - - // NOLINTNEXTLINE - RESULT Next(ARGS... args) { - // Save parent type and increment nesting - auto prev_parent_type = processor.parent_type; - processor.parent_type = processor.current_type; - processor.nesting_level++; - // NOLINTNEXTLINE - ResultWrapper result([&]() { return processor.ReadGeometry(cursor, args...); }); - - // Restore parent type and decrement nesting - processor.current_type = processor.parent_type; - processor.parent_type = prev_parent_type; - processor.nesting_level--; - - // Also move state forwards - current_item++; - - return result.ReturnAndDestroy(); - } - }; - - class PolygonState { - private: - friend class GeometryProcessor; - uint32_t ring_count; - uint32_t current_ring; - const_data_ptr_t count_ptr; - const_data_ptr_t data_ptr; - GeometryProcessor &processor; - explicit PolygonState(uint32_t ring_count, const_data_ptr_t count_ptr, const_data_ptr_t data_ptr, - GeometryProcessor &processor) - : ring_count(ring_count), current_ring(0), count_ptr(count_ptr), data_ptr(data_ptr), processor(processor) { - } - - public: - PolygonState(const PolygonState &other) = delete; - PolygonState &operator=(const PolygonState &other) = delete; - PolygonState(PolygonState &&other) = delete; - PolygonState &operator=(PolygonState &&other) = delete; - - uint32_t RingCount() const { - return ring_count; - } - bool IsDone() const { - return current_ring == ring_count; - } - VertexData Next() { - auto count = Load(count_ptr); - VertexData data(data_ptr, count, processor.HasZ(), processor.HasM()); - current_ring++; - count_ptr += sizeof(uint32_t); - data_ptr += count * sizeof(double) * (2 + (processor.HasZ() ? 1 : 0) + (processor.HasM() ? 1 : 0)); - return data; - } - }; - - virtual RESULT ProcessPoint(const VertexData &vertices, ARGS... args) = 0; - virtual RESULT ProcessLineString(const VertexData &vertices, ARGS... args) = 0; - virtual RESULT ProcessPolygon(PolygonState &state, ARGS... args) = 0; - virtual RESULT ProcessCollection(CollectionState &state, ARGS... args) = 0; - -public: - RESULT Process(const geometry_t &geom, ARGS... args) { - - const auto props = geom.GetProperties(); - - // Check the version - props.CheckVersion(); - - has_z = props.HasZ(); - has_m = props.HasM(); - nesting_level = 0; - current_type = geom.GetType(); - parent_type = LegacyGeometryType::POINT; - - Cursor cursor(geom); - - cursor.Skip(); - cursor.Skip(); - cursor.Skip(); - cursor.Skip(); - - auto dims = 2 + (has_z ? 1 : 0) + (has_m ? 1 : 0); - auto has_bbox = geom.GetProperties().HasBBox(); - auto bbox_size = has_bbox ? dims * 2 * sizeof(float) : 0; - cursor.Skip(bbox_size); - - return ReadGeometry(cursor, args...); - } - -private: - RESULT ReadGeometry(Cursor &cursor, ARGS... args) { - auto type = cursor.Peek(); - switch (type) { - case SerializedGeometryType::POINT: - current_type = LegacyGeometryType::POINT; - return ReadPoint(cursor, args...); - case SerializedGeometryType::LINESTRING: - current_type = LegacyGeometryType::LINESTRING; - return ReadLineString(cursor, args...); - case SerializedGeometryType::POLYGON: - current_type = LegacyGeometryType::POLYGON; - return ReadPolygon(cursor, args...); - case SerializedGeometryType::MULTIPOINT: - current_type = LegacyGeometryType::MULTIPOINT; - return ReadCollection(cursor, args...); - case SerializedGeometryType::MULTILINESTRING: - current_type = LegacyGeometryType::MULTILINESTRING; - return ReadCollection(cursor, args...); - case SerializedGeometryType::MULTIPOLYGON: - current_type = LegacyGeometryType::MULTIPOLYGON; - return ReadCollection(cursor, args...); - case SerializedGeometryType::GEOMETRYCOLLECTION: - current_type = LegacyGeometryType::GEOMETRYCOLLECTION; - return ReadCollection(cursor, args...); - default: - throw SerializationException("Unknown geometry type (%ud)", static_cast(type)); - } - } - - RESULT ReadPoint(Cursor &cursor, ARGS... args) { - auto type = cursor.Read(); - D_ASSERT(type == SerializedGeometryType::POINT); - (void)type; - auto count = cursor.Read(); - VertexData data(cursor.GetPtr(), count, HasZ(), HasM()); - cursor.Skip(data.ByteSize()); - return ProcessPoint(data, args...); - } - - RESULT ReadLineString(Cursor &cursor, ARGS... args) { - auto type = cursor.Read(); - D_ASSERT(type == SerializedGeometryType::LINESTRING); - (void)type; - auto count = cursor.Read(); - VertexData data(cursor.GetPtr(), count, HasZ(), HasM()); - cursor.Skip(data.ByteSize()); - return ProcessLineString(data, args...); - } - - RESULT ReadPolygon(Cursor &cursor, ARGS... args) { - auto type = cursor.Read(); - D_ASSERT(type == SerializedGeometryType::POLYGON); - (void)type; - auto ring_count = cursor.Read(); - auto count_ptr = cursor.GetPtr(); - cursor.Skip(ring_count * sizeof(uint32_t) + ((ring_count % 2) * sizeof(uint32_t))); - PolygonState state(ring_count, count_ptr, cursor.GetPtr(), *this); - - ResultWrapper result([&]() { return ProcessPolygon(state, args...); }); - - if (IsNested()) { - // Consume the rest of the polygon so we can continue processing the parent - while (!state.IsDone()) { - state.Next(); - } - } - cursor.SetPtr(const_cast(state.data_ptr)); - - return result.ReturnAndDestroy(); - } - - // NOLINTNEXTLINE - RESULT ReadCollection(Cursor &cursor, ARGS... args) { - auto type = cursor.Read(); - (void)type; - auto count = cursor.Read(); - CollectionState state(count, *this, cursor); - - ResultWrapper result([&]() { return ProcessCollection(state, args...); }); - - if (IsNested()) { - // Consume the rest of the collection so we can continue processing the parent - while (!state.IsDone()) { - state.Next(args...); - } - } - - return result.ReturnAndDestroy(); - } -}; - -} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/geometry/geometry_properties.hpp b/src/spatial/geometry/geometry_properties.hpp deleted file mode 100644 index 971b4f1d..00000000 --- a/src/spatial/geometry/geometry_properties.hpp +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once - -namespace duckdb { - -static constexpr const uint8_t GEOMETRY_VERSION = 0; - -struct GeometryProperties { -private: - static constexpr const uint8_t Z = 0x01; - static constexpr const uint8_t M = 0x02; - static constexpr const uint8_t BBOX = 0x04; - // Example of other useful properties: - // static constexpr const uint8_t EMPTY = 0x08; - // static constexpr const uint8_t GEODETIC = 0x10; - // static constexpr const uint8_t SOLID = 0x20; - static constexpr const uint8_t VERSION_1 = 0x40; - static constexpr const uint8_t VERSION_0 = 0x80; - uint8_t flags = 0; - -public: - explicit GeometryProperties(uint8_t flags = 0) : flags(flags) { - } - GeometryProperties(bool has_z, bool has_m) { - SetZ(has_z); - SetM(has_m); - } - - inline void CheckVersion() const { - const auto v0 = (flags & VERSION_0); - const auto v1 = (flags & VERSION_1); - if ((v1 | v0) != GEOMETRY_VERSION) { - throw NotImplementedException( - "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " - "compatible with this version. Please upgrade your DuckDB installation."); - } - } - - inline bool HasZ() const { - return (flags & Z) != 0; - } - inline bool HasM() const { - return (flags & M) != 0; - } - inline bool HasBBox() const { - return (flags & BBOX) != 0; - } - inline void SetZ(bool value) { - flags = value ? (flags | Z) : (flags & ~Z); - } - inline void SetM(bool value) { - flags = value ? (flags | M) : (flags & ~M); - } - inline void SetBBox(bool value) { - flags = value ? (flags | BBOX) : (flags & ~BBOX); - } - - uint32_t VertexSize() const { - return sizeof(double) * (2 + HasZ() + HasM()); - } -}; - -} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/geometry/geometry_serialization.cpp b/src/spatial/geometry/geometry_serialization.cpp index c9b03936..a803f893 100644 --- a/src/spatial/geometry/geometry_serialization.cpp +++ b/src/spatial/geometry/geometry_serialization.cpp @@ -5,421 +5,301 @@ #include "spatial/geometry/sgl.hpp" #include "duckdb/common/exception.hpp" +#include "duckdb/common/types/geometry.hpp" +#include "duckdb/common/types/string_type.hpp" #include "duckdb/storage/arena_allocator.hpp" namespace duckdb { -// TODO: Make non-recursive - -static size_t GetRequiredSizeInternal(const sgl::geometry *geom) { - const auto vertex_width = geom->get_vertex_width(); +size_t Serde::GetRequiredSize(const sgl::geometry &geom) { - switch (geom->get_type()) { - case sgl::geometry_type::POINT: - case sgl::geometry_type::LINESTRING: { - // 4 bytes for the type - // 4 bytes for the length - // sizeof(vertex) * count; - const auto vertex_count = geom->get_vertex_count(); - return 4 + 4 + vertex_count * vertex_width; - } - case sgl::geometry_type::POLYGON: { - // Polygons are special because they may pad between the rings and the ring data - // 4 bytes for the type - // 4 bytes for the length - // sizeof(vertex) * count; - size_t size = 4 + 4; - - const auto part_count = geom->get_part_count(); - const auto tail = geom->get_last_part(); - if (!tail) { - return size; + const auto root = geom.get_parent(); + auto part = &geom; + + size_t total_size = 0; + + while (true) { + total_size += sizeof(uint8_t); // LE/BE byte + total_size += sizeof(uint32_t); // type id + switch (part->get_type()) { + case sgl::geometry_type::POINT: { + total_size += part->get_vertex_width(); + } break; + case sgl::geometry_type::LINESTRING: { + total_size += sizeof(uint32_t) + (part->get_vertex_width() * part->get_vertex_count()); + } break; + case sgl::geometry_type::POLYGON: { + total_size += sizeof(uint32_t); // ring count + const auto tail = part->get_last_part(); + if (tail) { + auto ring = tail; + do { + ring = ring->get_next(); + total_size += sizeof(uint32_t) + (ring->get_vertex_width() * ring->get_vertex_count()); + } while (ring != tail); + } + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::GEOMETRY_COLLECTION: { + total_size += sizeof(uint32_t); // part count + if (part->is_empty()) { + break; + } + part = part->get_first_part(); + continue; } - auto part = tail; - do { - part = part->get_next(); - size += 4 + part->get_vertex_count() * vertex_width; - } while (part != tail); - - if (part_count % 2 == 1) { - size += 4; + default: { + throw InvalidInputException("Cannot serialize geometry of type %d", static_cast(part->get_type())); } - return size; - } - case sgl::geometry_type::MULTI_POINT: - case sgl::geometry_type::MULTI_LINESTRING: - case sgl::geometry_type::MULTI_POLYGON: - case sgl::geometry_type::GEOMETRY_COLLECTION: { - // 4 bytes for the type - // 4 bytes for the length - // recursive call for each part - size_t size = 4 + 4; - const auto tail = geom->get_last_part(); - if (!tail) { - return size; } - auto part = tail; - do { - part = part->get_next(); - size += GetRequiredSizeInternal(part); - } while (part != tail); - return size; - } - default: - D_ASSERT(false); - return 0; - } -} - -size_t Serde::GetRequiredSize(const sgl::geometry &geom) { - const auto type = geom.get_type(); - - const auto has_bbox = type != sgl::geometry_type::POINT && !geom.is_empty(); - const auto has_z = geom.has_z(); - const auto has_m = geom.has_m(); - - const auto dims = 2 + (has_z ? 1 : 0) + (has_m ? 1 : 0); - - const auto head_size = 4 + 4; // type + props + padding - const auto geom_size = GetRequiredSizeInternal(&geom); - const auto bbox_size = has_bbox ? dims * sizeof(float) * 2 : 0; - - const auto full_size = head_size + geom_size + bbox_size; - - // Check that the size is a multiple of 8 - D_ASSERT(full_size % 8 == 0); - - return full_size; -} - -static void SerializeVertices(BinaryWriter &cursor, const sgl::geometry *geom, const uint32_t count, const bool has_z, - const bool has_m, const bool has_bbox, const uint32_t vsize, sgl::extent_xyzm &bbox) { - const auto verts = geom->get_vertex_array(); - - // Copy the vertices to the cursor - const auto dst = cursor.Reserve(count * vsize); - - if (!has_bbox) { - // Fast path, issue on memcpy to the cursor - if (count * vsize != 0) { - memcpy(dst, verts, count * vsize); + while (true) { + const auto parent = part->get_parent(); + if (parent == root) { + return total_size; + } + if (part != parent->get_last_part()) { + part = part->get_next(); + break; + } + part = parent; } - return; } +} - sgl::vertex_xyzm vertex = {0}; - for (uint32_t i = 0; i < count; i++) { - - // Load the vertex from the geometry - memcpy(&vertex, verts + i * vsize, vsize); - - // Copy the vertex to the cursor - memcpy(dst + i * vsize, &vertex, vsize); - - bbox.min.x = std::min(bbox.min.x, vertex.x); - bbox.min.y = std::min(bbox.min.y, vertex.y); - bbox.max.x = std::max(bbox.max.x, vertex.x); - bbox.max.y = std::max(bbox.max.y, vertex.y); - - if (has_z) { - bbox.min.z = std::min(bbox.min.z, vertex.z); - bbox.max.z = std::max(bbox.max.z, vertex.z); +void Serde::Serialize(const sgl::geometry &geom, char *buffer, size_t buffer_size) { + const auto root = geom.get_parent(); + auto part = &geom; + + BinaryWriter writer(buffer, buffer_size); + + while (true) { + writer.Write(1); // Little Endian + + // Also write type + auto type_id = static_cast(part->get_type()); + type_id += part->has_z() * 1000; + type_id += part->has_m() * 2000; + writer.Write(type_id); + + switch (part->get_type()) { + case sgl::geometry_type::POINT: { + constexpr auto nan = std::numeric_limits::quiet_NaN(); + const auto vert_empty = sgl::vertex_xyzm {nan, nan, nan, nan}; + const auto vert_array = + part->is_empty() ? reinterpret_cast(&vert_empty) : part->get_vertex_array(); + const auto vert_width = part->get_vertex_width(); + + writer.Copy(vert_array, vert_width); + } break; + case sgl::geometry_type::LINESTRING: { + + const auto vert_array = part->get_vertex_array(); + const auto vert_width = part->get_vertex_width(); + const auto vert_count = part->get_vertex_count(); + + writer.Write(vert_count); + writer.Copy(vert_array, vert_width * vert_count); + } break; + case sgl::geometry_type::POLYGON: { + const auto ring_count = part->get_part_count(); + writer.Write(ring_count); + const auto tail = part->get_last_part(); + if (tail) { + auto ring = tail; + do { + ring = ring->get_next(); + + const auto vert_array = ring->get_vertex_array(); + const auto vert_width = ring->get_vertex_width(); + const auto vert_count = ring->get_vertex_count(); + + writer.Write(vert_count); + writer.Copy(vert_array, vert_width * vert_count); + + } while (ring != tail); + } + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::GEOMETRY_COLLECTION: { + const auto part_count = part->get_part_count(); + writer.Write(part_count); + if (part->is_empty()) { + break; + } + part = part->get_first_part(); + continue; } - if (has_m) { - bbox.min.m = std::min(bbox.min.m, vertex.m); - bbox.max.m = std::max(bbox.max.m, vertex.m); + default: { + throw InvalidInputException("Cannot serialize geometry of type %d", static_cast(part->get_type())); } - } -} - -static void SerializeRecursive(BinaryWriter &cursor, const sgl::geometry *geom, const bool has_z, const bool has_m, - const bool has_bbox, const uint32_t vsize, sgl::extent_xyzm &bbox) { - const auto type = geom->get_type(); - - if (type < sgl::geometry_type::POINT || type > sgl::geometry_type::GEOMETRY_COLLECTION) { - throw InvalidInputException("Cannot serialize geometry of type %d", static_cast(type)); - } - - // The LegacyGeometryType enum used to start with POINT = 0 - // but now it starts with INVALID = 0, so we need to subtract 1 - cursor.Write(static_cast(type) - 1); - - switch (type) { - case sgl::geometry_type::POINT: - case sgl::geometry_type::LINESTRING: { - const auto count = geom->get_vertex_count(); - cursor.Write(count); - SerializeVertices(cursor, geom, count, has_z, has_m, has_bbox, vsize, bbox); - } break; - case sgl::geometry_type::POLYGON: { - const auto count = geom->get_part_count(); - cursor.Write(count); - auto ring_cursor = cursor; - cursor.Skip((count * 4) + (count % 2 == 1 ? 4 : 0), true); - - const auto tail = geom->get_last_part(); - if (!tail) { - break; } - auto ring = tail; - do { - ring = ring->get_next(); - ring_cursor.Write(ring->get_vertex_count()); - SerializeVertices(cursor, ring, ring->get_vertex_count(), has_z, has_m, has_bbox, vsize, bbox); - } while (ring != tail); - - } break; - case sgl::geometry_type::MULTI_POINT: - case sgl::geometry_type::MULTI_LINESTRING: - case sgl::geometry_type::MULTI_POLYGON: - case sgl::geometry_type::GEOMETRY_COLLECTION: { - const auto count = geom->get_part_count(); - cursor.Write(count); - - const auto tail = geom->get_last_part(); - if (!tail) { - break; + while (true) { + const auto parent = part->get_parent(); + if (parent == root) { + return; + } + if (part != parent->get_last_part()) { + part = part->get_next(); + break; + } + part = parent; } - - auto part = tail; - do { - part = part->get_next(); - SerializeRecursive(cursor, part, has_z, has_m, has_bbox, vsize, bbox); - } while (part != tail); - } break; - default: - D_ASSERT(false); } } -void Serde::Serialize(const sgl::geometry &geom, char *buffer, size_t buffer_size) { - const auto type = geom.get_type(); - - const auto has_bbox = type != sgl::geometry_type::POINT && !geom.is_empty(); - const auto has_z = geom.has_z(); - const auto has_m = geom.has_m(); - - // Set flags - uint8_t flags = 0; - flags |= has_z ? 0x01 : 0; - flags |= has_m ? 0x02 : 0; - flags |= has_bbox ? 0x04 : 0; - - BinaryWriter cursor(buffer, buffer_size); - - if (type == sgl::geometry_type::INVALID) { - throw InvalidInputException("Cannot serialize geometry of type INVALID"); - } - - // The LegacyGeometryType enum used to start with POINT = 0 - // but now it starts with INVALID = 0, so we need to subtract 1 - cursor.Write(static_cast(type) - 1); - cursor.Write(flags); - cursor.Write(0); // unused for now - cursor.Write(0); // padding - - const auto dims = 2 + (has_z ? 1 : 0) + (has_m ? 1 : 0); - const auto vert_size = dims * sizeof(double); - const auto bbox_size = has_bbox ? dims * sizeof(float) * 2 : 0; +template +void Prepare(GEOM_TYPE &type, ArenaAllocator &allocator) { +} - // Setup a bbox to store the min/max values - sgl::extent_xyzm bbox = sgl::extent_xyzm::smallest(); +// Specialize for prepared_geometry +template <> +void Prepare(sgl::prepared_geometry &type, ArenaAllocator &allocator) { + GeometryAllocator alloc(allocator); + type.build(alloc); + type.set_prepared(true); +} - auto bbox_cursor = cursor; - cursor.Skip(bbox_size, true); +template +static void DeserializeInternal(sgl::geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { + BinaryReader reader(buffer, buffer_size); - SerializeRecursive(cursor, &geom, has_z, has_m, has_bbox, vert_size, bbox); + uint32_t stack[32]; + uint32_t depth = 0; - if (has_bbox) { - bbox_cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.x)); // xmin - bbox_cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.y)); // ymin - bbox_cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.x)); // xmax - bbox_cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.y)); // ymax + auto geom = &result; - if (has_z) { - bbox_cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.z)); // zmin - bbox_cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.z)); // zmax + while (true) { + const auto le = reader.Read() == 1; + if (!le) { + throw InvalidInputException("Only little-endian WKB is supported"); } - - if (has_m) { - bbox_cursor.Write(MathUtil::DoubleToFloatDown(bbox.min.m)); // mmin - bbox_cursor.Write(MathUtil::DoubleToFloatUp(bbox.max.m)); // mmax + const auto meta = reader.Read(); + const auto type = static_cast((meta & 0x0000FFFF) % 1000); + const auto flag = (meta & 0x0000FFFF) / 1000; + const auto has_z = (flag & 0x01) != 0; + const auto has_m = (flag & 0x02) != 0; + + geom->set_type(type); + geom->set_z(has_z); + geom->set_m(has_m); + + const auto vert_width = geom->get_vertex_width(); + switch (type) { + case sgl::geometry_type::POINT: { + constexpr auto nan = std::numeric_limits::quiet_NaN(); + const auto vert_array = reader.Reserve(vert_width); + auto vert_empty = sgl::vertex_xyzm {nan, nan, nan, nan}; + memcpy(&vert_empty, vert_array, vert_width); + if (vert_empty.all_nan()) { + geom->set_vertex_array(nullptr, 0); + } else { + geom->set_vertex_array(vert_array, 1); + } + } break; + case sgl::geometry_type::LINESTRING: { + const auto vert_count = reader.Read(); + const auto vert_array = reader.Reserve(vert_count * vert_width); + geom->set_vertex_array(vert_array, vert_count); + } break; + case sgl::geometry_type::POLYGON: { + const auto ring_count = reader.Read(); + if (ring_count == 0) { + break; + } + for (uint32_t i = 0; i < ring_count; i++) { + auto ring_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto ring = new (ring_mem) sgl::geometry(sgl::geometry_type::LINESTRING, has_z, has_m); + + const auto vert_count = reader.Read(); + const auto vert_array = reader.Reserve(vert_count * ring->get_vertex_width()); + ring->set_vertex_array(vert_array, vert_count); + + Prepare(*ring, arena); + + geom->append_part(ring); + } + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::GEOMETRY_COLLECTION: { + if (depth >= 32) { + throw InvalidInputException("Geometry is too deeply nested to deserialize"); + } + + const auto part_count = reader.Read(); + if (part_count == 0) { + break; + } + + stack[depth++] = part_count; + + // Make a new part + const auto part_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto part_ptr = new (part_mem) sgl::geometry(sgl::geometry_type::INVALID, has_z, has_m); + + geom->append_part(part_ptr); + geom = part_ptr; + + // Continue to next iteration + continue; } - } -} - -static void DeserializeRecursive(BinaryReader &cursor, sgl::geometry &geom, const bool has_z, const bool has_m, - ArenaAllocator &arena) { - const auto count = cursor.Read(); - switch (geom.get_type()) { - case sgl::geometry_type::POINT: - case sgl::geometry_type::LINESTRING: { - const auto verts = cursor.Reserve(count * geom.get_vertex_width()); - geom.set_vertex_array(verts, count); - } break; - case sgl::geometry_type::POLYGON: { - auto ring_cursor = cursor; - cursor.Skip((count * 4) + (count % 2 == 1 ? 4 : 0)); - for (uint32_t i = 0; i < count; i++) { - const auto ring_count = ring_cursor.Read(); - const auto verts = cursor.Reserve(ring_count * geom.get_vertex_width()); - - auto ring_mem = arena.AllocateAligned(sizeof(sgl::geometry)); - const auto ring = new (ring_mem) sgl::geometry(sgl::geometry_type::LINESTRING, has_z, has_m); - ring->set_vertex_array(verts, ring_count); - - geom.append_part(ring); + default: { + throw InvalidInputException("Cannot deserialize geometry of type %d", static_cast(type)); } - } break; - case sgl::geometry_type::MULTI_POINT: - case sgl::geometry_type::MULTI_LINESTRING: - case sgl::geometry_type::MULTI_POLYGON: - case sgl::geometry_type::GEOMETRY_COLLECTION: { - for (uint32_t i = 0; i < count; i++) { - const auto part_type = static_cast(cursor.Read() + 1); - auto part_mem = arena.AllocateAligned(sizeof(sgl::geometry)); - const auto part = new (part_mem) sgl::geometry(part_type, has_z, has_m); - DeserializeRecursive(cursor, *part, has_z, has_m, arena); - - geom.append_part(part); } - } break; - default: - break; - } -} -void Serde::Deserialize(sgl::geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { + // Inner loop + while (true) { + const auto parent = geom->get_parent(); - BinaryReader cursor(buffer, buffer_size); + if (depth == 0) { + return; + } - const auto type = static_cast(cursor.Read() + 1); - const auto flags = cursor.Read(); - cursor.Skip(sizeof(uint16_t)); - cursor.Skip(sizeof(uint32_t)); // padding + stack[depth - 1]--; + if (stack[depth - 1] > 0) { + const auto part_mem = arena.AllocateAligned(sizeof(sgl::geometry)); + const auto part_ptr = new (part_mem) sgl::geometry(sgl::geometry_type::INVALID, has_z, has_m); - // Parse flags - const auto has_z = (flags & 0x01) != 0; - const auto has_m = (flags & 0x02) != 0; - const auto has_bbox = (flags & 0x04) != 0; + parent->append_part(part_ptr); - const auto format_v1 = (flags & 0x40) != 0; - const auto format_v0 = (flags & 0x80) != 0; + geom = part_ptr; + break; + } - if (format_v1 || format_v0) { - // Unsupported version, throw an error - throw NotImplementedException( - "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " - "compatible with this version. Please upgrade your DuckDB installation."); - } - - if (has_bbox) { - // Skip past bbox if present - cursor.Skip(sizeof(float) * 2 * (2 + has_z + has_m)); + geom = parent; + depth--; + } } - - // Create root geometry - result.set_type(type); - result.set_z(has_z); - result.set_m(has_m); - - // Read the first type - cursor.Read(); - - // Deserialize the geometry - DeserializeRecursive(cursor, result, has_z, has_m, arena); } -static void DeserializePreparedRecursive(BinaryReader &cursor, sgl::prepared_geometry &geom, const bool has_z, - const bool has_m, GeometryAllocator &alloc) { - const auto count = cursor.Read(); - switch (geom.get_type()) { - case sgl::geometry_type::POINT: - case sgl::geometry_type::LINESTRING: { - const auto verts = cursor.Reserve(count * geom.get_vertex_width()); - geom.set_vertex_array(verts, count); - } break; - case sgl::geometry_type::POLYGON: { - auto ring_cursor = cursor; - cursor.Skip((count * 4) + (count % 2 == 1 ? 4 : 0)); - for (uint32_t i = 0; i < count; i++) { - const auto ring_count = ring_cursor.Read(); - const auto verts = cursor.Reserve(ring_count * geom.get_vertex_width()); - - auto ring_mem = alloc.alloc(sizeof(sgl::prepared_geometry)); - const auto ring = new (ring_mem) sgl::prepared_geometry(sgl::geometry_type::LINESTRING, has_z, has_m); - - ring->set_vertex_array(verts, ring_count); - - ring->build(alloc); - ring->set_prepared(true); - - geom.append_part(ring); - } - } break; - case sgl::geometry_type::MULTI_POINT: - case sgl::geometry_type::MULTI_LINESTRING: - case sgl::geometry_type::MULTI_POLYGON: - case sgl::geometry_type::GEOMETRY_COLLECTION: { - for (uint32_t i = 0; i < count; i++) { - const auto part_type = static_cast(cursor.Read() + 1); - auto part_mem = alloc.alloc(sizeof(sgl::prepared_geometry)); - const auto part = new (part_mem) sgl::prepared_geometry(part_type, has_z, has_m); - - DeserializePreparedRecursive(cursor, *part, has_z, has_m, alloc); - - geom.append_part(part); - } - } break; - default: - break; - } +void Serde::Deserialize(sgl::geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { + DeserializeInternal(result, arena, buffer, buffer_size); } void Serde::DeserializePrepared(sgl::prepared_geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { + DeserializeInternal(result, arena, buffer, buffer_size); +} - BinaryReader cursor(buffer, buffer_size); - - const auto type = static_cast(cursor.Read() + 1); - const auto flags = cursor.Read(); - cursor.Skip(sizeof(uint16_t)); - cursor.Skip(sizeof(uint32_t)); // padding - - // Parse flags - const auto has_z = (flags & 0x01) != 0; - const auto has_m = (flags & 0x02) != 0; - const auto has_bbox = (flags & 0x04) != 0; - - const auto format_v1 = (flags & 0x40) != 0; - const auto format_v0 = (flags & 0x80) != 0; - - if (format_v1 || format_v0) { - // Unsupported version, throw an error - throw NotImplementedException( - "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " - "compatible with this version. Please upgrade your DuckDB installation."); - } - - if (has_bbox) { - // Skip past bbox if present - cursor.Skip(sizeof(float) * 2 * (2 + has_z + has_m)); +uint32_t Serde::TryGetBounds(const string_t &blob, Box2D &bbox) { + GeometryExtent extent; + const auto count = Geometry::GetExtent(blob, extent); + if (count == 0) { + return 0; } - - // Create root geometry - result.set_type(type); - result.set_z(has_z); - result.set_m(has_m); - - // Read the first type - cursor.Read(); - - GeometryAllocator alloc(arena); - - // Deserialize the geometry - DeserializePreparedRecursive(cursor, result, has_z, has_m, alloc); + bbox.min.x = MathUtil::DoubleToFloatDown(extent.x_min); + bbox.min.y = MathUtil::DoubleToFloatDown(extent.y_min); + bbox.max.x = MathUtil::DoubleToFloatUp(extent.x_max); + bbox.max.y = MathUtil::DoubleToFloatUp(extent.y_max); + return count; } } // namespace duckdb diff --git a/src/spatial/geometry/geometry_serialization.hpp b/src/spatial/geometry/geometry_serialization.hpp index 7feadd0a..d82e45fb 100644 --- a/src/spatial/geometry/geometry_serialization.hpp +++ b/src/spatial/geometry/geometry_serialization.hpp @@ -1,6 +1,9 @@ #pragma once +#include "bbox.hpp" + #include +#include namespace sgl { class geometry; @@ -18,6 +21,8 @@ struct Serde { static void Deserialize(sgl::geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size); static void DeserializePrepared(sgl::prepared_geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size); + + static uint32_t TryGetBounds(const string_t &blob, Box2D &bbox); }; } // namespace duckdb diff --git a/src/spatial/geometry/geometry_type.hpp b/src/spatial/geometry/geometry_type.hpp deleted file mode 100644 index 8ce41a99..00000000 --- a/src/spatial/geometry/geometry_type.hpp +++ /dev/null @@ -1,159 +0,0 @@ -#pragma once - -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/types/string_type.hpp" -#include "spatial/geometry/bbox.hpp" -#include "spatial/geometry/geometry_properties.hpp" -#include "spatial/util/cursor.hpp" - -#include "duckdb/common/type_util.hpp" -#include "spatial/util/math.hpp" - -namespace duckdb { - -enum class LegacyGeometryType : uint8_t { - POINT = 0, - LINESTRING, - POLYGON, - MULTIPOINT, - MULTILINESTRING, - MULTIPOLYGON, - GEOMETRYCOLLECTION -}; - -struct LegacyGeometryTypes { - static bool IsSinglePart(LegacyGeometryType type) { - return type == LegacyGeometryType::POINT || type == LegacyGeometryType::LINESTRING; - } - - static bool IsMultiPart(LegacyGeometryType type) { - return type == LegacyGeometryType::POLYGON || type == LegacyGeometryType::MULTIPOINT || - type == LegacyGeometryType::MULTILINESTRING || type == LegacyGeometryType::MULTIPOLYGON || - type == LegacyGeometryType::GEOMETRYCOLLECTION; - } - - static bool IsCollection(LegacyGeometryType type) { - return type == LegacyGeometryType::MULTIPOINT || type == LegacyGeometryType::MULTILINESTRING || - type == LegacyGeometryType::MULTIPOLYGON || type == LegacyGeometryType::GEOMETRYCOLLECTION; - } - - static string ToString(LegacyGeometryType type) { - switch (type) { - case LegacyGeometryType::POINT: - return "POINT"; - case LegacyGeometryType::LINESTRING: - return "LINESTRING"; - case LegacyGeometryType::POLYGON: - return "POLYGON"; - case LegacyGeometryType::MULTIPOINT: - return "MULTIPOINT"; - case LegacyGeometryType::MULTILINESTRING: - return "MULTILINESTRING"; - case LegacyGeometryType::MULTIPOLYGON: - return "MULTIPOLYGON"; - case LegacyGeometryType::GEOMETRYCOLLECTION: - return "GEOMETRYCOLLECTION"; - default: - return StringUtil::Format("UNKNOWN(%d)", static_cast(type)); - } - } -}; - -enum class SerializedGeometryType : uint32_t { - POINT = 0, - LINESTRING, - POLYGON, - MULTIPOINT, - MULTILINESTRING, - MULTIPOLYGON, - GEOMETRYCOLLECTION -}; - -// A serialized geometry -class geometry_t { -private: - string_t data; - -public: - geometry_t() = default; - // NOLINTNEXTLINE - explicit geometry_t(string_t data) : data(data) { - } - - // NOLINTNEXTLINE - operator string_t() const { - return data; - } - - LegacyGeometryType GetType() const { - // return the type - const auto type = Load(const_data_ptr_cast(data.GetPrefix())); - const auto props = Load(const_data_ptr_cast(data.GetPrefix() + 1)); - props.CheckVersion(); - return type; - } - - GeometryProperties GetProperties() const { - const auto props = Load(const_data_ptr_cast(data.GetPrefix() + 1)); - // Check the version - props.CheckVersion(); - return props; - } - - bool TryGetCachedBounds(Box2D &bbox) const { - Cursor cursor(data); - - // Read the header - auto header_type = cursor.Read(); - auto properties = cursor.Read(); - auto hash = cursor.Read(); - (void)hash; - - // Check the version - properties.CheckVersion(); - - if (properties.HasBBox()) { - cursor.Skip(4); // skip padding - - // Now set the bounding box - bbox.min.x = cursor.Read(); - bbox.min.y = cursor.Read(); - bbox.max.x = cursor.Read(); - bbox.max.y = cursor.Read(); - return true; - } - - if (header_type == LegacyGeometryType::POINT) { - cursor.Skip(4); // skip padding - - // Read the point - auto type = cursor.Read(); - D_ASSERT(type == SerializedGeometryType::POINT); - (void)type; - - auto count = cursor.Read(); - if (count == 0) { - // If the point is empty, there is no bounding box - return false; - } - - const auto x = cursor.Read(); - const auto y = cursor.Read(); - bbox.min.x = MathUtil::DoubleToFloatDown(x); - bbox.min.y = MathUtil::DoubleToFloatDown(y); - bbox.max.x = MathUtil::DoubleToFloatUp(x); - bbox.max.y = MathUtil::DoubleToFloatUp(y); - return true; - } - return false; - } -}; - -template <> -inline PhysicalType GetTypeId() { - return PhysicalType::VARCHAR; -} - -static_assert(sizeof(geometry_t) == sizeof(string_t), "geometry_t should be the same size as string_t"); - -} // namespace duckdb diff --git a/src/spatial/geometry/vertex.hpp b/src/spatial/geometry/vertex.hpp index 1fd2e0f3..c1399498 100644 --- a/src/spatial/geometry/vertex.hpp +++ b/src/spatial/geometry/vertex.hpp @@ -147,4 +147,4 @@ struct PointXYZM : PointXYZ { } }; -} // namespace duckdb \ No newline at end of file +} // namespace duckdb diff --git a/src/spatial/geometry/wkb_writer.cpp b/src/spatial/geometry/wkb_writer.cpp deleted file mode 100644 index 58aecc34..00000000 --- a/src/spatial/geometry/wkb_writer.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#include "spatial/geometry/wkb_writer.hpp" -#include "spatial/geometry/geometry_processor.hpp" - -#include "duckdb/common/types/vector.hpp" - -namespace duckdb { - -namespace { - -//------------------------------------------------------------------------------ -// Size Calculator -//------------------------------------------------------------------------------ -class WKBSizeCalculator final : GeometryProcessor { - uint32_t ProcessPoint(const VertexData &vertices) override { - // + + + (+ + ) - // WKB Points always write points even if empty - return sizeof(uint8_t) + sizeof(uint32_t) + sizeof(double) * (2 + (HasZ() ? 1 : 0) + (HasM() ? 1 : 0)); - } - - uint32_t ProcessLineString(const VertexData &vertices) override { - // + + + - return sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t) + vertices.ByteSize(); - } - - uint32_t ProcessPolygon(PolygonState &state) override { - // + + + - uint32_t size = sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t); - while (!state.IsDone()) { - // + - size += sizeof(uint32_t) + state.Next().ByteSize(); - } - return size; - } - - uint32_t ProcessCollection(CollectionState &state) override { - // + + - uint32_t size = sizeof(uint8_t) + sizeof(uint32_t) + sizeof(uint32_t); - while (!state.IsDone()) { - // + - size += state.Next(); - } - return size; - } - -public: - virtual ~WKBSizeCalculator() = default; - - uint32_t Execute(const geometry_t &geometry) { - return Process(geometry); - } -}; - -//------------------------------------------------------------------------------ -// Serializer -//------------------------------------------------------------------------------ -class WKBSerializer final : GeometryProcessor { - - void WriteHeader(Cursor &cursor) { - // - cursor.Write(1); - uint32_t type_id = static_cast(CurrentType()) + 1; - if (HasZ()) { - type_id += 1000; - } - if (HasM()) { - type_id += 2000; - } - // - cursor.Write(type_id); - } - - void ProcessPoint(const VertexData &vertices, Cursor &cursor) override { - WriteHeader(cursor); - if (vertices.IsEmpty()) { - cursor.Write(std::numeric_limits::quiet_NaN()); - cursor.Write(std::numeric_limits::quiet_NaN()); - if (HasZ()) { - cursor.Write(std::numeric_limits::quiet_NaN()); - } - if (HasM()) { - cursor.Write(std::numeric_limits::quiet_NaN()); - } - } else { - cursor.Write(Load(vertices.data[0])); - cursor.Write(Load(vertices.data[1])); - if (HasZ()) { - cursor.Write(Load(vertices.data[2])); - } - if (HasM()) { - cursor.Write(Load(vertices.data[3])); - } - } - } - - void ProcessVertices(const VertexData &vertices, Cursor &cursor) { - bool has_z = HasZ(); - bool has_m = HasM(); - for (uint32_t i = 0; i < vertices.count; i++) { - cursor.Write(Load(vertices.data[0] + i * vertices.stride[0])); - cursor.Write(Load(vertices.data[1] + i * vertices.stride[1])); - if (has_z) { - cursor.Write(Load(vertices.data[2] + i * vertices.stride[2])); - } - if (has_m) { - cursor.Write(Load(vertices.data[3] + i * vertices.stride[3])); - } - } - } - - void ProcessLineString(const VertexData &vertices, Cursor &cursor) override { - WriteHeader(cursor); - cursor.Write(vertices.count); - ProcessVertices(vertices, cursor); - } - - void ProcessPolygon(PolygonState &state, Cursor &cursor) override { - WriteHeader(cursor); - cursor.Write(state.RingCount()); - while (!state.IsDone()) { - auto vertices = state.Next(); - cursor.Write(vertices.count); - ProcessVertices(vertices, cursor); - } - } - - void ProcessCollection(CollectionState &state, Cursor &cursor) override { - WriteHeader(cursor); - cursor.Write(state.ItemCount()); - while (!state.IsDone()) { - state.Next(cursor); - } - } - -public: - virtual ~WKBSerializer() = default; - void Execute(const geometry_t &geometry, data_ptr_t start, data_ptr_t end) { - Cursor cursor(start, end); - Process(geometry, cursor); - } - void Execute(const geometry_t &geometry, string_t &blob) { - Cursor cursor(blob); - Process(geometry, cursor); - blob.Finalize(); - } -}; - -} // namespace - -//------------------------------------------------------------------------------ -// WKB Writer -//------------------------------------------------------------------------------ -string_t WKBWriter::Write(const geometry_t &geometry, Vector &result) { - WKBSizeCalculator size_processor; - WKBSerializer serializer; - auto size = size_processor.Execute(geometry); - auto blob = StringVector::EmptyString(result, size); - serializer.Execute(geometry, blob); - return blob; -} - -string_t WKBWriter::Write(const string_t &geometry, Vector &result) { - const geometry_t geom(geometry); - return Write(geom, result); -} - -void WKBWriter::Write(const geometry_t &geometry, vector &buffer) { - WKBSizeCalculator size_processor; - WKBSerializer serializer; - auto size = size_processor.Execute(geometry); - buffer.resize(size); - serializer.Execute(geometry, buffer.data(), buffer.data() + size); -} - -void WKBWriter::Write(const string_t &geometry, vector &buffer) { - const geometry_t geom(geometry); - Write(geom, buffer); -} - -const_data_ptr_t WKBWriter::Write(const geometry_t &geometry, uint32_t *size, ArenaAllocator &allocator) { - WKBSizeCalculator size_processor; - WKBSerializer serializer; - auto blob_size = size_processor.Execute(geometry); - auto blob = allocator.AllocateAligned(blob_size); - serializer.Execute(geometry, blob, blob + blob_size); - *size = blob_size; - return blob; -} - -const_data_ptr_t WKBWriter::Write(const string_t &geometry, uint32_t *size, ArenaAllocator &allocator) { - const geometry_t geom(geometry); - return WKBWriter::Write(geom, size, allocator); -} - -} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/geometry/wkb_writer.hpp b/src/spatial/geometry/wkb_writer.hpp deleted file mode 100644 index 55d64e77..00000000 --- a/src/spatial/geometry/wkb_writer.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include "spatial/geometry/geometry_type.hpp" -#include "duckdb/common/types/string_type.hpp" - -namespace duckdb { - -class ArenaAllocator; - -struct WKBWriter { - // Write a geometry to a WKB blob attached to a vector - static string_t Write(const geometry_t &geometry, Vector &result); - static string_t Write(const string_t &geometry, Vector &result); - - // Write a geometry to a WKB blob into a buffer - static void Write(const geometry_t &geometry, vector &buffer); - static void Write(const string_t &geometry, vector &buffer); - - // Write a geometry to a WKB blob into an arena allocator - static const_data_ptr_t Write(const geometry_t &geometry, uint32_t *size, ArenaAllocator &allocator); - static const_data_ptr_t Write(const string_t &geometry, uint32_t *size, ArenaAllocator &allocator); -}; - -} // namespace duckdb \ No newline at end of file diff --git a/src/spatial/index/rtree/rtree_index.cpp b/src/spatial/index/rtree/rtree_index.cpp index b1d8f9da..eafc28c9 100644 --- a/src/spatial/index/rtree/rtree_index.cpp +++ b/src/spatial/index/rtree/rtree_index.cpp @@ -5,8 +5,8 @@ #include "duckdb/execution/index/fixed_size_allocator.hpp" #include "duckdb/storage/table/scan_state.hpp" #include "duckdb/main/database.hpp" +#include "spatial/geometry/geometry_serialization.hpp" -#include "spatial/geometry/geometry_type.hpp" #include "spatial/index/rtree/rtree_module.hpp" #include "spatial/index/rtree/rtree_node.hpp" #include "spatial/index/rtree/rtree_scanner.hpp" @@ -148,7 +148,7 @@ ErrorData RTreeIndex::Insert(IndexLock &lock, DataChunk &input, Vector &rowid_ve input.Flatten(); auto &geom_vec = input.data[0]; - const auto &geom_data = FlatVector::GetData(geom_vec); + const auto &geom_data = FlatVector::GetData(geom_vec); const auto &rowid_data = FlatVector::GetData(rowid_vec); if (geom_data == nullptr || rowid_data == nullptr) { @@ -167,7 +167,7 @@ ErrorData RTreeIndex::Insert(IndexLock &lock, DataChunk &input, Vector &rowid_ve const auto rowid = rowid_data[i]; Box2D bbox; - if (!geom_data[i].TryGetCachedBounds(bbox)) { + if (!Serde::TryGetBounds(geom_data[i], bbox)) { valid_buffer[i] = false; continue; } @@ -218,11 +218,11 @@ void RTreeIndex::Delete(IndexLock &lock, DataChunk &input, Vector &rowid_vec) { continue; } - auto &geom = UnifiedVectorFormat::GetData(geom_format)[geom_idx]; + auto &geom = UnifiedVectorFormat::GetData(geom_format)[geom_idx]; auto &rowid = UnifiedVectorFormat::GetData(rowid_format)[rowid_idx]; Box2D approx_bounds; - if (!geom.TryGetCachedBounds(approx_bounds)) { + if (!Serde::TryGetBounds(geom, approx_bounds)) { continue; } diff --git a/src/spatial/index/rtree/rtree_index_plan_scan.cpp b/src/spatial/index/rtree/rtree_index_plan_scan.cpp index 42a19567..e0cdf5bf 100644 --- a/src/spatial/index/rtree/rtree_index_plan_scan.cpp +++ b/src/spatial/index/rtree/rtree_index_plan_scan.cpp @@ -19,12 +19,12 @@ #include "duckdb/main/database.hpp" #include "spatial/geometry/bbox.hpp" -#include "spatial/geometry/geometry_type.hpp" #include "spatial/index/rtree/rtree_index.hpp" #include "spatial/index/rtree/rtree_index_create_logical.hpp" #include "spatial/index/rtree/rtree_index_scan.hpp" #include "spatial/index/rtree/rtree_module.hpp" #include "spatial/spatial_types.hpp" +#include "spatial/geometry/geometry_serialization.hpp" #include "spatial/util/math.hpp" namespace duckdb { @@ -115,12 +115,8 @@ class RTreeIndexScanOptimizer : public OptimizerExtension { } static bool TryGetBoundingBox(const Value &value, Box2D &bbox) { - const auto str = value.GetValueUnsafe(); - const geometry_t blob(str); - if (!blob.TryGetCachedBounds(bbox)) { - return false; - } - return true; + const auto blob = value.GetValueUnsafe(); + return Serde::TryGetBounds(blob, bbox) != 0; } static bool TryOptimize(Binder &binder, ClientContext &context, unique_ptr &plan, diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index 3f016acb..1b1761aa 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -3,7 +3,6 @@ // Spatial #include "spatial/spatial_types.hpp" #include "spatial/geometry/sgl.hpp" -#include "spatial/geometry/wkb_writer.hpp" #include "spatial/geometry/geometry_serialization.hpp" #include "spatial/util/function_builder.hpp" @@ -1800,10 +1799,10 @@ struct ST_Write { if (type == LogicalType::GEOMETRY()) { const auto blob = value.GetValueUnsafe(); - uint32_t size; - const auto wkb = WKBWriter::Write(blob, &size, arena); + uint32_t size = blob.GetSize(); OGRGeometry *ptr; - const auto ok = OGRGeometryFactory::createFromWkb(wkb, nullptr, &ptr, size, wkbVariantIso); + // TODO: Fix this + const auto ok = OGRGeometryFactory::createFromWkb(blob.GetData(), nullptr, &ptr, size, wkbVariantIso); if (ok != OGRERR_NONE) { throw IOException("Could not parse WKB"); } diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index 27a666a0..448c4a7a 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -1635,8 +1635,9 @@ struct ST_MaximumInscribedCircle { static void Register(ExtensionLoader &loader) { - const auto result_type = LogicalType::STRUCT( - {{"center", LogicalType::GEOMETRY()}, {"nearest", LogicalType::GEOMETRY()}, {"radius", LogicalType::DOUBLE}}); + const auto result_type = LogicalType::STRUCT({{"center", LogicalType::GEOMETRY()}, + {"nearest", LogicalType::GEOMETRY()}, + {"radius", LogicalType::DOUBLE}}); FunctionBuilder::RegisterScalar(loader, "ST_MaximumInscribedCircle", [&](ScalarFunctionBuilder &func) { func.AddVariant([&](ScalarFunctionVariantBuilder &variant) { @@ -2526,8 +2527,8 @@ struct ST_Union_Agg { } static void Register(ExtensionLoader &loader) { - AggregateFunction agg({LogicalType::GEOMETRY()}, LogicalType::GEOMETRY(), StateSize, Initialize, Update, Combine, - Finalize, nullptr, nullptr, Destroy); + AggregateFunction agg({LogicalType::GEOMETRY()}, LogicalType::GEOMETRY(), StateSize, Initialize, Update, + Combine, Finalize, nullptr, nullptr, Destroy); FunctionBuilder::RegisterAggregate(loader, "ST_Union_Agg", [&](AggregateFunctionBuilder &func) { func.SetFunction(agg); @@ -2791,8 +2792,8 @@ struct ST_CoverageSimplify_Agg : GEOSCoverageAggFunction { static void Register(ExtensionLoader &loader) { using SELF = ST_CoverageSimplify_Agg; - AggregateFunction agg({LogicalType::GEOMETRY(), LogicalType::DOUBLE}, LogicalType::GEOMETRY(), StateSize, Initialize, - Update, Combine, Finalize, nullptr, Bind, Destroy); + AggregateFunction agg({LogicalType::GEOMETRY(), LogicalType::DOUBLE}, LogicalType::GEOMETRY(), StateSize, + Initialize, Update, Combine, Finalize, nullptr, Bind, Destroy); FunctionBuilder::RegisterAggregate(loader, "ST_CoverageSimplify_Agg", [&](AggregateFunctionBuilder &func) { func.SetFunction(agg); @@ -2952,8 +2953,8 @@ struct ST_CoverageInvalidEdges_Agg : GEOSCoverageAggFunction { static void Register(ExtensionLoader &loader) { using SELF = ST_CoverageInvalidEdges_Agg; - AggregateFunction agg({LogicalType::GEOMETRY()}, LogicalType::GEOMETRY(), StateSize, Initialize, Update, Combine, - Finalize, nullptr, Bind, Destroy, nullptr); + AggregateFunction agg({LogicalType::GEOMETRY()}, LogicalType::GEOMETRY(), StateSize, Initialize, Update, + Combine, Finalize, nullptr, Bind, Destroy, nullptr); FunctionBuilder::RegisterAggregate(loader, "ST_CoverageInvalidEdges_Agg", [&](AggregateFunctionBuilder &func) { func.SetFunction(agg); diff --git a/src/spatial/modules/geos/geos_serde.cpp b/src/spatial/modules/geos/geos_serde.cpp index 642336f3..633020d2 100644 --- a/src/spatial/modules/geos/geos_serde.cpp +++ b/src/spatial/modules/geos/geos_serde.cpp @@ -2,31 +2,36 @@ #include "duckdb/common/typedefs.hpp" #include "geos_c.h" +#include "duckdb/common/types/geometry.hpp" +#include "sgl/sgl.hpp" -#include -#include -#include -#include "spatial/geometry/geometry_processor.hpp" +#include "duckdb/common/assert.hpp" +#include "spatial/util/binary_writer.hpp" +#include "spatial/util/math.hpp" +#include "spatial/util/binary_reader.hpp" +namespace sgl { +enum class geometry_type : uint8_t; +} namespace duckdb { template static T StorageTypeFromGEOS(int type) { switch (type) { case GEOS_POINT: - return static_cast(0); - case GEOS_LINESTRING: return static_cast(1); - case GEOS_POLYGON: + case GEOS_LINESTRING: return static_cast(2); - case GEOS_MULTIPOINT: + case GEOS_POLYGON: return static_cast(3); - case GEOS_MULTILINESTRING: + case GEOS_MULTIPOINT: return static_cast(4); - case GEOS_MULTIPOLYGON: + case GEOS_MULTILINESTRING: return static_cast(5); - case GEOS_GEOMETRYCOLLECTION: + case GEOS_MULTIPOLYGON: return static_cast(6); + case GEOS_GEOMETRYCOLLECTION: + return static_cast(7); default: throw InvalidInputException("Unsupported GEOS geometry type %d", type); } @@ -35,105 +40,68 @@ static T StorageTypeFromGEOS(int type) { //---------------------------------------------------------------------------------------------------------------------- // Get Required Size //---------------------------------------------------------------------------------------------------------------------- - static size_t GetCoordSeqLength(const GEOSContextHandle_t ctx, const GEOSCoordSequence *seq) { uint32_t len = 0; GEOSCoordSeq_getSize_r(ctx, seq, &len); return len; } -static size_t GetRequiredSizeInternal(const GEOSContextHandle_t ctx, const GEOSGeometry *geom) { +size_t GeosSerde::GetRequiredSize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom) { const auto type = GEOSGeomTypeId_r(ctx, geom); const bool has_z = GEOSHasZ_r(ctx, geom); const bool has_m = GEOSHasM_r(ctx, geom); - const auto vsize = sizeof(double) * (2 + has_z + has_m); + const auto vert_width = sizeof(double) * (2 + has_z + has_m); + + size_t size = sizeof(uint8_t) + sizeof(uint32_t); // endian + type switch (type) { case GEOS_POINT: { - return 4 + 4 + (GEOSisEmpty_r(ctx, geom) ? 0 : vsize); - } + size += vert_width; + } break; case GEOS_LINESTRING: { - const auto line_seq = GEOSGeom_getCoordSeq_r(ctx, geom); - uint32_t line_len = 0; - GEOSCoordSeq_getSize_r(ctx, line_seq, &line_len); - return 4 + 4 + line_len * vsize; - } + const auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); + const auto len = GetCoordSeqLength(ctx, seq); + size += sizeof(uint32_t) + (len * vert_width); + } break; case GEOS_POLYGON: { - // 4 bytes for type, - // 4 bytes for num rings - // 4 bytes for num points in shell, - // vertex_size bytes per point in shell, - // 4 bytes for num holes, - // 4 bytes for num points in hole, - // vertex_size bytes per point in hole - // 4 bytes padding if (shell + holes) % 2 == 1 - size_t size = 4 + 4; + + size += sizeof(uint32_t); // num rings + if (GEOSisEmpty_r(ctx, geom)) { + break; + } const auto exterior_ptr = GEOSGetExteriorRing_r(ctx, geom); const auto exterior_seq = GEOSGeom_getCoordSeq_r(ctx, exterior_ptr); - uint32_t exterior_len = 0; - GEOSCoordSeq_getSize_r(ctx, exterior_seq, &exterior_len); - size += 4 + exterior_len * vsize; + const auto exterior_len = GetCoordSeqLength(ctx, exterior_seq); + + size += sizeof(uint32_t); // num points in shell + size += exterior_len * vert_width; // shell points const auto num_rings = GEOSGetNumInteriorRings_r(ctx, geom); for (auto i = 0; i < num_rings; i++) { const auto interior_ptr = GEOSGetInteriorRingN_r(ctx, geom, i); const auto interior_seq = GEOSGeom_getCoordSeq_r(ctx, interior_ptr); - uint32_t interior_len = 0; - GEOSCoordSeq_getSize_r(ctx, interior_seq, &interior_len); - size += 4 + interior_len * vsize; - } - - // We need to count the shell as well - if ((num_rings + 1) % 2 != 0) { - size += 4; + const auto interior_len = GetCoordSeqLength(ctx, interior_seq); + size += sizeof(uint32_t); // num points in hole + size += interior_len * vert_width; // hole points } - return size; - } + } break; case GEOS_MULTIPOINT: case GEOS_MULTILINESTRING: case GEOS_MULTIPOLYGON: case GEOS_GEOMETRYCOLLECTION: { - size_t size = 4 + 4; + size += sizeof(uint32_t); // num parts const auto num_items = GEOSGetNumGeometries_r(ctx, geom); for (auto i = 0; i < num_items; i++) { const auto item = GEOSGetGeometryN_r(ctx, geom, i); - const auto item_size = GetRequiredSizeInternal(ctx, item); - if (item_size == 0) { - // Unsupported geometry type - return 0; - } - size += item_size; + size += GetRequiredSize(ctx, item); } - return size; - } + } break; default: - // Unsupported geometry type - return 0; + break; } -} - -size_t GeosSerde::GetRequiredSize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom) { - const auto is_point = (GEOSGeomTypeId_r(ctx, geom) == GEOS_POINT); - const auto is_empty = GEOSisEmpty_r(ctx, geom); - - const auto has_bbox = !is_point && !is_empty; - const auto has_z = GEOSHasZ_r(ctx, geom); - const auto has_m = GEOSHasM_r(ctx, geom); - - const auto dims = 2 + (has_z ? 1 : 0) + (has_m ? 1 : 0); - - const auto head_size = 4 + 4; // type + props + padding - const auto geom_size = GetRequiredSizeInternal(ctx, geom); - const auto bbox_size = has_bbox ? dims * sizeof(float) * 2 : 0; - - const auto full_size = head_size + geom_size + bbox_size; - - // Check that the size is a multiple of 8 - D_ASSERT(full_size % 8 == 0); - - return full_size; + return size; } //---------------------------------------------------------------------------------------------------------------------- @@ -151,25 +119,35 @@ static void SerializeInternal(const GEOSContextHandle_t ctx, const GEOSGeometry const bool has_z = GEOSHasZ_r(ctx, geom); const bool has_m = GEOSHasM_r(ctx, geom); - cursor.Write(StorageTypeFromGEOS(type)); + cursor.Write(1); // Little Endian + cursor.Write(StorageTypeFromGEOS(type) + (has_z * 1000) + (has_m * 2000)); switch (type) { - case GEOS_POINT: + case GEOS_POINT: { + if (GEOSisEmpty_r(ctx, geom)) { + // Write NaNs for empty point + constexpr auto nan = std::numeric_limits::quiet_NaN(); + constexpr VertexXYZM empty_point {nan, nan, nan, nan}; + cursor.Copy(reinterpret_cast(&empty_point), sizeof(double) * (2 + has_z + has_m)); + } else { + const auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); + SerializeCoordSeq(ctx, seq, has_z, has_m, 1, cursor); + } + } break; case GEOS_LINESTRING: { if (GEOSisEmpty_r(ctx, geom)) { cursor.Write(0); - return; + break; } const auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); const auto len = GetCoordSeqLength(ctx, seq); cursor.Write(len); SerializeCoordSeq(ctx, seq, has_z, has_m, len, cursor); - return; - } + } break; case GEOS_POLYGON: { if (GEOSisEmpty_r(ctx, geom)) { cursor.Write(0); - return; + break; } const auto num_rings = GEOSGetNumInteriorRings_r(ctx, geom); @@ -180,21 +158,8 @@ static void SerializeInternal(const GEOSContextHandle_t ctx, const GEOSGeometry const auto exterior_seq = GEOSGeom_getCoordSeq_r(ctx, exterior_ptr); const auto exterior_len = GetCoordSeqLength(ctx, exterior_seq); - // Save the cursor position to write the ring lengths later - BinaryWriter len_cursor = cursor; - - // Jump over the ring lengths - cursor.Skip(sizeof(uint32_t) * (num_rings + 1)); - - // Add padding if odd number of rings - if ((num_rings + 1) % 2 != 0) { - cursor.Write(0); - } - - // Now write both the length and the coordinates in one pass - // Starting with the exterior ring - len_cursor.Write(exterior_len); + cursor.Write(exterior_len); SerializeCoordSeq(ctx, exterior_seq, has_z, has_m, exterior_len, cursor); // And for each interior ring @@ -202,11 +167,10 @@ static void SerializeInternal(const GEOSContextHandle_t ctx, const GEOSGeometry const auto interior_ptr = GEOSGetInteriorRingN_r(ctx, geom, i); const auto interior_seq = GEOSGeom_getCoordSeq_r(ctx, interior_ptr); const auto interior_len = GetCoordSeqLength(ctx, interior_seq); - len_cursor.Write(interior_len); + cursor.Write(interior_len); SerializeCoordSeq(ctx, interior_seq, has_z, has_m, interior_len, cursor); } - return; - } + } break; case GEOS_MULTIPOINT: case GEOS_MULTILINESTRING: case GEOS_MULTIPOLYGON: @@ -217,8 +181,7 @@ static void SerializeInternal(const GEOSContextHandle_t ctx, const GEOSGeometry const auto item = GEOSGetGeometryN_r(ctx, geom, i); SerializeInternal(ctx, item, cursor); } - return; - } + } break; default: // Unsupported geometry type D_ASSERT(false); @@ -226,134 +189,6 @@ static void SerializeInternal(const GEOSContextHandle_t ctx, const GEOSGeometry } } -namespace { - -struct Point { - double x; - double y; - double z; - double m; -}; - -struct Extent { - Point min; - Point max; -}; - -} // namespace - -inline void GetCoordSeqExtent(const GEOSContextHandle_t ctx, const GEOSCoordSeq_t *geom, bool has_z, bool has_m, - Extent &extent) { - - double x; - double y; - double z; - double m; - - const auto len = GetCoordSeqLength(ctx, geom); - - for (size_t i = 0; i < len; i++) { - GEOSCoordSeq_getXY_r(ctx, geom, i, &x, &y); - extent.min.x = std::min(extent.min.x, x); - extent.min.y = std::min(extent.min.y, y); - extent.max.x = std::max(extent.max.x, x); - extent.max.y = std::max(extent.max.y, y); - } - - if (has_z && has_m) { - for (size_t i = 0; i < len; i++) { - GEOSCoordSeq_getZ_r(ctx, geom, i, &z); - GEOSCoordSeq_getOrdinate_r(ctx, geom, i, 3, &m); - extent.min.z = std::min(extent.min.z, z); - extent.min.m = std::min(extent.min.m, m); - extent.max.z = std::max(extent.max.z, z); - extent.max.m = std::max(extent.max.m, m); - } - } else if (has_z) { - for (size_t i = 0; i < len; i++) { - GEOSCoordSeq_getZ_r(ctx, geom, i, &z); - extent.min.z = std::min(extent.min.z, z); - extent.max.z = std::max(extent.max.z, z); - } - } else if (has_m) { - for (size_t i = 0; i < len; i++) { - GEOSCoordSeq_getOrdinate_r(ctx, geom, i, 2, &m); - extent.min.m = std::min(extent.min.m, m); - extent.max.m = std::max(extent.max.m, m); - } - } -} - -inline void GetGeometryExtent(const GEOSContextHandle_t ctx, const GEOSGeometry *geom, bool has_z, bool has_m, - Extent &extent) { - switch (GEOSGeomTypeId_r(ctx, geom)) { - case GEOS_POINT: - case GEOS_LINESTRING: { - if (GEOSisEmpty_r(ctx, geom)) { - return; - } - const auto seq = GEOSGeom_getCoordSeq_r(ctx, geom); - GetCoordSeqExtent(ctx, seq, has_z, has_m, extent); - break; - } - case GEOS_POLYGON: { - // We only need to check the exterior ring - if (GEOSisEmpty_r(ctx, geom)) { - return; - } - const auto exterior_ptr = GEOSGetExteriorRing_r(ctx, geom); - const auto exterior_seq = GEOSGeom_getCoordSeq_r(ctx, exterior_ptr); - GetCoordSeqExtent(ctx, exterior_seq, has_z, has_m, extent); - break; - } - case GEOS_MULTIPOINT: - case GEOS_MULTILINESTRING: - case GEOS_MULTIPOLYGON: - case GEOS_GEOMETRYCOLLECTION: { - const auto num_items = GEOSGetNumGeometries_r(ctx, geom); - for (auto i = 0; i < num_items; i++) { - const auto item = GEOSGetGeometryN_r(ctx, geom, i); - GetGeometryExtent(ctx, item, has_z, has_m, extent); - } - break; - } - default: - // Unsupported geometry type - break; - } -} - -inline void SerializeExtent(const GEOSContextHandle_t ctx, const GEOSGeometry *geom, bool has_z, bool has_m, - BinaryWriter &cursor) { - - Extent extent = {}; - extent.min.x = std::numeric_limits::max(); - extent.min.y = std::numeric_limits::max(); - extent.min.z = std::numeric_limits::max(); - extent.min.m = std::numeric_limits::max(); - extent.max.x = std::numeric_limits::lowest(); - extent.max.y = std::numeric_limits::lowest(); - extent.max.z = std::numeric_limits::lowest(); - extent.max.m = std::numeric_limits::lowest(); - - GetGeometryExtent(ctx, geom, has_z, has_m, extent); - - cursor.Write(MathUtil::DoubleToFloatDown(extent.min.x)); - cursor.Write(MathUtil::DoubleToFloatDown(extent.min.y)); - cursor.Write(MathUtil::DoubleToFloatUp(extent.max.x)); - cursor.Write(MathUtil::DoubleToFloatUp(extent.max.y)); - - if (has_z) { - cursor.Write(MathUtil::DoubleToFloatDown(extent.min.z)); - cursor.Write(MathUtil::DoubleToFloatUp(extent.max.z)); - } - - if (has_m) { - cursor.Write(MathUtil::DoubleToFloatDown(extent.min.m)); - cursor.Write(MathUtil::DoubleToFloatUp(extent.max.m)); - } -} - void GeosSerde::Serialize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom, char *buffer, size_t buffer_size) { BinaryWriter cursor(buffer, buffer_size); @@ -363,25 +198,6 @@ void GeosSerde::Serialize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom, char throw InvalidInputException("Unsupported GEOS geometry type %d", type); } - const auto has_bbox = (type != GEOS_POINT && (GEOSisEmpty_r(ctx, geom) == 0)); - const auto has_z = GEOSHasZ_r(ctx, geom); - const auto has_m = GEOSHasM_r(ctx, geom); - - // Set flags - uint8_t flags = 0; - flags |= has_z ? 0x01 : 0; - flags |= has_m ? 0x02 : 0; - flags |= has_bbox ? 0x04 : 0; - - cursor.Write(StorageTypeFromGEOS(type)); - cursor.Write(flags); - cursor.Write(0); // unused - cursor.Write(0); // padding - - if (has_bbox) { - SerializeExtent(ctx, geom, has_z, has_m, cursor); - } - // Serialize the geometry SerializeInternal(ctx, geom, cursor); } @@ -389,130 +205,118 @@ void GeosSerde::Serialize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom, char //------------------------------------------------------------------------------ // Deserialize //------------------------------------------------------------------------------ -// TODO: Remove the GeometryProcessor from here, come up with something better. - -namespace { - -template -bool IsPointerAligned(const void *ptr) { - auto uintptr = reinterpret_cast(ptr); - return (uintptr % alignof(T)) == 0; -} - -class GEOSDeserializer final : GeometryProcessor { -private: - GEOSContextHandle_t ctx; - vector aligned_buffer; - -private: - GEOSCoordSeq_t *HandleVertexData(const VertexData &vertices) { - auto n_dims = 2 + (HasZ() ? 1 : 0) + (HasM() ? 1 : 0); - auto vertex_size = sizeof(double) * n_dims; - - // We know that the data is interleaved :^) - auto data = vertices.data[0]; - auto count = vertices.count; +static GEOSGeom_t *DeserializeInternal(BinaryReader &reader, GEOSContextHandle_t ctx); - if (HasZ()) { - // GEOS does a memcpy in this case, so we can pass the buffer directly even if it's not aligned - return GEOSCoordSeq_copyFromBuffer_r(ctx, reinterpret_cast(data), count, HasZ(), HasM()); - } else { - auto data_ptr = data; - auto vertex_data = reinterpret_cast(data_ptr); - if (!IsPointerAligned(data_ptr)) { - // If the pointer is not aligned we need to copy the data to an aligned buffer before passing it to GEOS - aligned_buffer.clear(); - aligned_buffer.resize(count * n_dims); - memcpy(aligned_buffer.data(), data_ptr, count * vertex_size); - vertex_data = aligned_buffer.data(); - } - - return GEOSCoordSeq_copyFromBuffer_r(ctx, vertex_data, count, HasZ(), HasM()); - } - } +template +static GEOSGeom_t *DeserializeTemplated(BinaryReader &reader, GEOSContextHandle_t ctx, sgl::geometry_type type) { + constexpr auto VERTEX_SIZE = V::HAS_Z + V::HAS_M + 2; - GEOSGeometry *ProcessPoint(const VertexData &data) override { - if (data.IsEmpty()) { + switch (type) { + case sgl::geometry_type::POINT: { + auto vert = reader.Read(); + if (vert.AllNan()) { return GEOSGeom_createEmptyPoint_r(ctx); - } else { - auto seq = HandleVertexData(data); - return GEOSGeom_createPoint_r(ctx, seq); } + auto seq = GEOSCoordSeq_copyFromBuffer_r(ctx, reinterpret_cast(&vert), 1, V::HAS_Z, V::HAS_M); + return GEOSGeom_createPoint_r(ctx, seq); } - - GEOSGeometry *ProcessLineString(const VertexData &data) override { - if (data.IsEmpty()) { + case sgl::geometry_type::LINESTRING: { + const auto vert_count = reader.Read(); + if (vert_count == 0) { return GEOSGeom_createEmptyLineString_r(ctx); - } else { - auto seq = HandleVertexData(data); - return GEOSGeom_createLineString_r(ctx, seq); } + auto vert_array = new double[vert_count * VERTEX_SIZE]; + auto ptr = reader.Reserve(vert_count * VERTEX_SIZE * sizeof(double)); + memcpy(vert_array, ptr, vert_count * VERTEX_SIZE * sizeof(double)); + auto seq = GEOSCoordSeq_copyFromBuffer_r(ctx, vert_array, vert_count, V::HAS_Z, V::HAS_M); + delete[] vert_array; + return GEOSGeom_createLineString_r(ctx, seq); } - - GEOSGeometry *ProcessPolygon(PolygonState &state) override { - auto num_rings = state.RingCount(); - if (num_rings == 0) { + case sgl::geometry_type::POLYGON: { + const auto ring_count = reader.Read(); + if (ring_count == 0) { return GEOSGeom_createEmptyPolygon_r(ctx); - } else { - // TODO: Make a vector here instead of using new - auto geoms = new GEOSGeometry *[num_rings]; - for (uint32_t i = 0; i < num_rings; i++) { - auto vertices = state.Next(); - auto seq = HandleVertexData(vertices); - geoms[i] = GEOSGeom_createLinearRing_r(ctx, seq); - } - auto result = GEOSGeom_createPolygon_r(ctx, geoms[0], geoms + 1, num_rings - 1); - delete[] geoms; - return result; } + vector rings; + for (uint32_t i = 0; i < ring_count; i++) { + const auto vert_count = reader.Read(); + auto vert_array = new double[vert_count * VERTEX_SIZE]; + auto ptr = reader.Reserve(vert_count * VERTEX_SIZE * sizeof(double)); + memcpy(vert_array, ptr, vert_count * VERTEX_SIZE * sizeof(double)); + auto seq = GEOSCoordSeq_copyFromBuffer_r(ctx, vert_array, vert_count, V::HAS_Z, V::HAS_M); + delete[] vert_array; + rings.push_back(GEOSGeom_createLinearRing_r(ctx, seq)); + } + return GEOSGeom_createPolygon_r(ctx, rings[0], rings.data() + 1, ring_count - 1); } - - GEOSGeometry *ProcessCollection(CollectionState &state) override { - GEOSGeomTypes collection_type = GEOS_GEOMETRYCOLLECTION; - switch (CurrentType()) { - case LegacyGeometryType::MULTIPOINT: - collection_type = GEOS_MULTIPOINT; - break; - case LegacyGeometryType::MULTILINESTRING: - collection_type = GEOS_MULTILINESTRING; - break; - case LegacyGeometryType::MULTIPOLYGON: - collection_type = GEOS_MULTIPOLYGON; - break; - default: - break; + case sgl::geometry_type::MULTI_POINT: { + vector rings; + const auto part_count = reader.Read(); + for (uint32_t i = 0; i < part_count; i++) { + rings.push_back(DeserializeInternal(reader, ctx)); } - auto item_count = state.ItemCount(); - if (item_count == 0) { - return GEOSGeom_createEmptyCollection_r(ctx, collection_type); - } else { - auto geoms = new GEOSGeometry *[item_count]; - for (uint32_t i = 0; i < item_count; i++) { - geoms[i] = state.Next(); - } - auto result = GEOSGeom_createCollection_r(ctx, collection_type, geoms, item_count); - delete[] geoms; - return result; + return GEOSGeom_createCollection_r(ctx, GEOS_MULTIPOINT, rings.data(), part_count); + } + case sgl::geometry_type::MULTI_LINESTRING: { + vector rings; + const auto part_count = reader.Read(); + for (uint32_t i = 0; i < part_count; i++) { + rings.push_back(DeserializeInternal(reader, ctx)); } + return GEOSGeom_createCollection_r(ctx, GEOS_MULTILINESTRING, rings.data(), part_count); } - -public: - explicit GEOSDeserializer(GEOSContextHandle_t ctx) : ctx(ctx) { + case sgl::geometry_type::MULTI_POLYGON: { + vector rings; + const auto part_count = reader.Read(); + for (uint32_t i = 0; i < part_count; i++) { + rings.push_back(DeserializeInternal(reader, ctx)); + } + return GEOSGeom_createCollection_r(ctx, GEOS_MULTIPOLYGON, rings.data(), part_count); } - virtual ~GEOSDeserializer() { + case sgl::geometry_type::GEOMETRY_COLLECTION: { + vector rings; + const auto part_count = reader.Read(); + for (uint32_t i = 0; i < part_count; i++) { + rings.push_back(DeserializeInternal(reader, ctx)); + } + return GEOSGeom_createCollection_r(ctx, GEOS_GEOMETRYCOLLECTION, rings.data(), part_count); } - - GEOSGeom_t *Execute(const geometry_t &geom) { - return Process(geom); + default: + throw InvalidInputException("Unsupported geometry type %d", static_cast(type)); } -}; +} + +static GEOSGeom_t *DeserializeInternal(BinaryReader &reader, GEOSContextHandle_t ctx) { -} // namespace + while (true) { + const auto le = reader.Read(); + if (!le) { + throw InvalidInputException("Only little-endian WKB is supported"); + } + + const auto meta = reader.Read(); + const auto type = static_cast((meta & 0x0000FFFF) % 1000); + const auto flag = (meta & 0x0000FFFF) / 1000; + const auto has_z = (flag & 0x01) != 0; + const auto has_m = (flag & 0x02) != 0; + + if (has_z && has_m) { + return DeserializeTemplated(reader, ctx, type); + } + if (has_z) { + return DeserializeTemplated(reader, ctx, type); + } + if (has_m) { + return DeserializeTemplated(reader, ctx, type); + } else { + return DeserializeTemplated(reader, ctx, type); + } + } +} GEOSGeom_t *GeosSerde::Deserialize(GEOSContextHandle_t ctx, const char *buffer, size_t buffer_size) { - geometry_t blob(string_t(buffer, buffer_size)); - GEOSDeserializer deserializer(ctx); - return deserializer.Execute(blob); + BinaryReader reader(buffer, buffer_size); + return DeserializeInternal(reader, ctx); } } // namespace duckdb diff --git a/src/spatial/modules/main/spatial_functions.hpp b/src/spatial/modules/main/spatial_functions.hpp index 7aa5f69d..f4552780 100644 --- a/src/spatial/modules/main/spatial_functions.hpp +++ b/src/spatial/modules/main/spatial_functions.hpp @@ -23,7 +23,6 @@ struct CoreVectorOperations { static void Polygon2DToVarchar(Vector &source, Vector &result, idx_t count); static void Polygon3DToVarchar(Vector &source, Vector &result, idx_t count); static void Box2DToVarchar(Vector &source, Vector &result, idx_t count); - static void GeometryToVarchar(Vector &source, Vector &result, idx_t count); }; } // namespace duckdb diff --git a/src/spatial/modules/main/spatial_functions_aggregate.cpp b/src/spatial/modules/main/spatial_functions_aggregate.cpp index c361ba66..e2ca0760 100644 --- a/src/spatial/modules/main/spatial_functions_aggregate.cpp +++ b/src/spatial/modules/main/spatial_functions_aggregate.cpp @@ -1,7 +1,6 @@ #include "spatial/geometry/bbox.hpp" #include "spatial/geometry/geometry_serialization.hpp" #include "spatial/geometry/sgl.hpp" -#include "spatial/geometry/geometry_type.hpp" #include "spatial/modules/main/spatial_functions.hpp" #include "spatial/spatial_types.hpp" #include "spatial/util/function_builder.hpp" diff --git a/src/spatial/modules/main/spatial_functions_cast.cpp b/src/spatial/modules/main/spatial_functions_cast.cpp index 7d67a6fd..36b988d9 100644 --- a/src/spatial/modules/main/spatial_functions_cast.cpp +++ b/src/spatial/modules/main/spatial_functions_cast.cpp @@ -1,10 +1,8 @@ #include "spatial/modules/main/spatial_functions.hpp" -#include "spatial/geometry/geometry_processor.hpp" #include "spatial/geometry/sgl.hpp" #include "spatial/geometry/geometry_serialization.hpp" #include "spatial/spatial_types.hpp" #include "spatial/util/math.hpp" -#include "spatial/geometry/wkb_writer.hpp" #include "duckdb/common/error_data.hpp" #include "duckdb/common/operator/cast_operators.hpp" @@ -79,57 +77,6 @@ string_t LocalState::Serialize(Vector &vector, const sgl::geometry &geom) { struct GeometryCasts { - //------------------------------------------------------------------------------------------------------------------ - // GEOMETRY -> VARCHAR - //------------------------------------------------------------------------------------------------------------------ - static bool ToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters &) { - CoreVectorOperations::GeometryToVarchar(source, result, count); - return true; - } - - //------------------------------------------------------------------------------------------------------------------ - // VARCHAR -> GEOMETRY - //------------------------------------------------------------------------------------------------------------------ - static bool FromVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { - auto &lstate = LocalState::ResetAndGet(parameters); - auto &alloc = lstate.GetAllocator(); - - sgl::wkt_reader reader(alloc); - - auto success = true; - - UnaryExecutor::ExecuteWithNulls( - source, result, count, [&](const string_t &wkt, ValidityMask &mask, idx_t row_idx) { - const auto wkt_ptr = wkt.GetDataUnsafe(); - const auto wkt_len = wkt.GetSize(); - - sgl::geometry geom; - - if (!reader.try_parse(geom, wkt_ptr, wkt_len)) { - if (success) { - success = false; - const auto error = reader.get_error_message(); - HandleCastError::AssignError(error, parameters.error_message); - } - mask.SetInvalid(row_idx); - return string_t {}; - } - - return lstate.Serialize(result, geom); - }); - - return success; - } - - //------------------------------------------------------------------------------------------------------------------ - // GEOMETRY -> WKB_BLOB - //------------------------------------------------------------------------------------------------------------------ - static bool ToWKBCast(Vector &source, Vector &result, idx_t count, CastParameters &) { - UnaryExecutor::Execute( - source, result, count, [&](const string_t &input) { return WKBWriter::Write(input, result); }); - return true; - } - //------------------------------------------------------------------------------------------------------------------ // WKB_BLOB -> GEOMETRY //------------------------------------------------------------------------------------------------------------------ @@ -173,15 +120,8 @@ struct GeometryCasts { const auto wkb_type = GeoTypes::WKB_BLOB(); const auto geom_type = LogicalType::GEOMETRY(); - // VARCHAR -> Geometry is explicitly castable - loader.RegisterCastFunction(geom_type, LogicalType::VARCHAR, BoundCastInfo(ToVarcharCast), 1); - - // Geometry -> VARCHAR is implicitly castable - loader.RegisterCastFunction(LogicalType::VARCHAR, geom_type, - BoundCastInfo(FromVarcharCast, nullptr, LocalState::InitCast)); - // Geometry -> WKB is explicitly castable - loader.RegisterCastFunction(geom_type, wkb_type, BoundCastInfo(ToWKBCast)); + // loader.RegisterCastFunction(geom_type, wkb_type, BoundCastInfo(ToWKBCast)); // Geometry -> BLOB is explicitly castable loader.RegisterCastFunction(geom_type, LogicalType::BLOB, DefaultCasts::ReinterpretCast); @@ -1188,208 +1128,6 @@ void CoreVectorOperations::Box2DToVarchar(Vector &source, Vector &result, idx_t }); } -//------------------------------------------------------------------------------ -// GEOMETRY -> VARCHAR -//------------------------------------------------------------------------------ -namespace { -class GeometryTextProcessor final : GeometryProcessor { -private: - string text; - -public: - void OnVertexData(const VertexData &data) { - auto &dims = data.data; - auto &strides = data.stride; - auto count = data.count; - - if (HasZ() && HasM()) { - for (uint32_t i = 0; i < count; i++) { - auto x = Load(dims[0] + i * strides[0]); - auto y = Load(dims[1] + i * strides[1]); - auto z = Load(dims[2] + i * strides[2]); - auto m = Load(dims[3] + i * strides[3]); - text += MathUtil::format_coord(x, y, z, m); - if (i < count - 1) { - text += ", "; - } - } - } else if (HasZ()) { - for (uint32_t i = 0; i < count; i++) { - auto x = Load(dims[0] + i * strides[0]); - auto y = Load(dims[1] + i * strides[1]); - auto zm = Load(dims[2] + i * strides[2]); - text += MathUtil::format_coord(x, y, zm); - if (i < count - 1) { - text += ", "; - } - } - } else if (HasM()) { - for (uint32_t i = 0; i < count; i++) { - auto x = Load(dims[0] + i * strides[0]); - auto y = Load(dims[1] + i * strides[1]); - auto m = Load(dims[3] + i * strides[3]); - text += MathUtil::format_coord(x, y, m); - if (i < count - 1) { - text += ", "; - } - } - } else { - for (uint32_t i = 0; i < count; i++) { - auto x = Load(dims[0] + i * strides[0]); - auto y = Load(dims[1] + i * strides[1]); - text += MathUtil::format_coord(x, y); - - if (i < count - 1) { - text += ", "; - } - } - } - } - - void ProcessPoint(const VertexData &data, bool in_typed_collection) override { - if (!in_typed_collection) { - text += "POINT"; - if (HasZ() && HasM()) { - text += " ZM"; - } else if (HasZ()) { - text += " Z"; - } else if (HasM()) { - text += " M"; - } - text += " "; - } - - if (data.count == 0) { - text += "EMPTY"; - } else if (in_typed_collection) { - OnVertexData(data); - } else { - text += "("; - OnVertexData(data); - text += ")"; - } - } - - void ProcessLineString(const VertexData &data, bool in_typed_collection) override { - if (!in_typed_collection) { - text += "LINESTRING"; - if (HasZ() && HasM()) { - text += " ZM"; - } else if (HasZ()) { - text += " Z"; - } else if (HasM()) { - text += " M"; - } - text += " "; - } - - if (data.count == 0) { - text += "EMPTY"; - } else { - text += "("; - OnVertexData(data); - text += ")"; - } - } - - void ProcessPolygon(PolygonState &state, bool in_typed_collection) override { - if (!in_typed_collection) { - text += "POLYGON"; - if (HasZ() && HasM()) { - text += " ZM"; - } else if (HasZ()) { - text += " Z"; - } else if (HasM()) { - text += " M"; - } - text += " "; - } - - if (state.RingCount() == 0) { - text += "EMPTY"; - } else { - text += "("; - bool first = true; - while (!state.IsDone()) { - if (!first) { - text += ", "; - } - first = false; - text += "("; - auto vertices = state.Next(); - OnVertexData(vertices); - text += ")"; - } - text += ")"; - } - } - - void ProcessCollection(CollectionState &state, bool) override { - bool collection_is_typed = false; - switch (CurrentType()) { - case LegacyGeometryType::MULTIPOINT: - text += "MULTIPOINT"; - collection_is_typed = true; - break; - case LegacyGeometryType::MULTILINESTRING: - text += "MULTILINESTRING"; - collection_is_typed = true; - break; - case LegacyGeometryType::MULTIPOLYGON: - text += "MULTIPOLYGON"; - collection_is_typed = true; - break; - case LegacyGeometryType::GEOMETRYCOLLECTION: - text += "GEOMETRYCOLLECTION"; - collection_is_typed = false; - break; - default: - throw InvalidInputException("Invalid geometry type"); - } - - if (HasZ() && HasM()) { - text += " ZM"; - } else if (HasZ()) { - text += " Z"; - } else if (HasM()) { - text += " M"; - } - - if (state.ItemCount() == 0) { - text += " EMPTY"; - } else { - text += " ("; - bool first = true; - while (!state.IsDone()) { - if (!first) { - text += ", "; - } - first = false; - state.Next(collection_is_typed); - } - text += ")"; - } - } - - virtual ~GeometryTextProcessor() = default; - - const string &Execute(const geometry_t &geom) { - text.clear(); - Process(geom, false); - return text; - } -}; - -} // namespace - -void CoreVectorOperations::GeometryToVarchar(Vector &source, Vector &result, idx_t count) { - GeometryTextProcessor processor; - UnaryExecutor::Execute(source, result, count, [&](const geometry_t &input) { - const auto text = processor.Execute(input); - return StringVector::AddString(result, text); - }); -} - //###################################################################################################################### // Register //###################################################################################################################### diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index f36fc614..c95d23f4 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -1,8 +1,8 @@ // Spatial #include "spatial/modules/main/spatial_functions.hpp" #include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/geometry/vertex.hpp" #include "spatial/geometry/sgl.hpp" -#include "spatial/geometry/wkb_writer.hpp" #include "spatial/spatial_types.hpp" #include "spatial/util/binary_reader.hpp" #include "spatial/util/function_builder.hpp" @@ -937,17 +937,6 @@ struct ST_AsText { CoreVectorOperations::Box2DToVarchar(input, result, count); } - //------------------------------------------------------------------------------------------------------------------ - // GEOMETRY - //------------------------------------------------------------------------------------------------------------------ - // TODO: Move this to SGL once we have proper double formatting - static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { - D_ASSERT(args.data.size() == 1); - auto count = args.size(); - auto &input = args.data[0]; - CoreVectorOperations::GeometryToVarchar(input, result, count); - } - //------------------------------------------------------------------------------------------------------------------ // Documentation //------------------------------------------------------------------------------------------------------------------ @@ -966,13 +955,6 @@ struct ST_AsText { //------------------------------------------------------------------------------------------------------------------ static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_AsText", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", LogicalType::GEOMETRY()); - variant.SetReturnType(LogicalType::VARCHAR); - - variant.SetFunction(ExecuteGeometry); - }); - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("point", GeoTypes::POINT_2D()); variant.SetReturnType(LogicalType::VARCHAR); @@ -1020,9 +1002,7 @@ struct ST_AsWKB { // GEOMETRY //------------------------------------------------------------------------------------------------------------------ static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - - UnaryExecutor::Execute( - args.data[0], result, args.size(), [&](const string_t &input) { return WKBWriter::Write(input, result); }); + return Geometry::ToBinary(args.data[0], result, args.size()); } //------------------------------------------------------------------------------------------------------------------ @@ -1066,18 +1046,17 @@ struct ST_AsHEXWKB { // GEOMETRY //------------------------------------------------------------------------------------------------------------------ static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - vector buffer; UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { - buffer.clear(); + auto size = blob.GetSize(); + auto data = blob.GetData(); - WKBWriter::Write(blob, buffer); - - auto blob_size = buffer.size() * 2; // every byte is rendered as two characters + auto blob_size = size * 2; // every byte is rendered as two characters auto blob_str = StringVector::EmptyString(result, blob_size); auto blob_ptr = blob_str.GetDataWriteable(); idx_t str_idx = 0; - for (auto byte : buffer) { + for (idx_t i = 0; i < size; i++) { + auto byte = data[i]; auto byte_a = byte >> 4; auto byte_b = byte & 0x0F; blob_ptr[str_idx++] = Blob::HEX_TABLE[byte_a]; @@ -3256,7 +3235,7 @@ struct ST_Extent_Approx { UnifiedVectorFormat input_vdata; input.ToUnifiedFormat(count, input_vdata); - const auto input_data = UnifiedVectorFormat::GetData(input_vdata); + const auto input_data = UnifiedVectorFormat::GetData(input_vdata); for (idx_t i = 0; i < count; i++) { const auto row_idx = input_vdata.sel->get_index(i); @@ -3265,7 +3244,7 @@ struct ST_Extent_Approx { // Try to get the cached bounding box from the blob Box2D bbox; - if (blob.TryGetCachedBounds(bbox)) { + if (Serde::TryGetBounds(blob, bbox)) { min_x_data[i] = bbox.min.x; min_y_data[i] = bbox.min.y; max_x_data[i] = bbox.max.x; @@ -6128,12 +6107,12 @@ struct ST_Hilbert { // GEOMETRY //------------------------------------------------------------------------------------------------------------------ static void ExecuteGeometry(DataChunk &args, ExpressionState &state, Vector &result) { - UnaryExecutor::ExecuteWithNulls( + UnaryExecutor::ExecuteWithNulls( args.data[0], result, args.size(), - [&](const geometry_t &geom, ValidityMask &mask, idx_t out_idx) -> uint32_t { + [&](const string_t &geom, ValidityMask &mask, idx_t out_idx) -> uint32_t { // TODO: This is shit, dont rely on cached bounds Box2D bounds; - if (!geom.TryGetCachedBounds(bounds)) { + if (!Serde::TryGetBounds(geom, bounds)) { mask.SetInvalid(out_idx); return 0; } @@ -9342,10 +9321,10 @@ struct ST_MMin : VertexAggFunctionBase { static constexpr auto ORDINATE = VertexOrdinate::M; }; -constexpr const char * ST_M::NAME; -constexpr const char * ST_X::NAME; -constexpr const char * ST_Y::NAME; -constexpr const char * ST_Z::NAME; +constexpr const char *ST_M::NAME; +constexpr const char *ST_X::NAME; +constexpr const char *ST_Y::NAME; +constexpr const char *ST_Z::NAME; } // namespace diff --git a/src/spatial/operators/spatial_join_physical.cpp b/src/spatial/operators/spatial_join_physical.cpp index b18c6f48..dd36854b 100644 --- a/src/spatial/operators/spatial_join_physical.cpp +++ b/src/spatial/operators/spatial_join_physical.cpp @@ -1,5 +1,4 @@ #include "spatial/operators/spatial_join_physical.hpp" -#include "spatial/geometry/geometry_type.hpp" #include "spatial/geometry/sgl.hpp" #include "spatial/spatial_types.hpp" #include "spatial_join_logical.hpp" @@ -13,6 +12,8 @@ #include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/expression/bound_conjunction_expression.hpp" #include "duckdb/storage/buffer_manager.hpp" +#include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/util/math.hpp" namespace duckdb { @@ -627,7 +628,7 @@ SinkFinalizeType PhysicalSpatialJoin::Finalize(Pipeline &pipeline, Event &event, gstate.collection->Gather(row_pointer_vector, sel, row_count, build_side_key_col, geom_vec, sel, nullptr); // Get a pointer to what we just gathered - const auto geom_ptr = FlatVector::GetData(geom_vec); + const auto geom_ptr = FlatVector::GetData(geom_vec); // Push the bounding boxes into the R-Tree for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { if (!validity.RowIsValid(row_idx)) { @@ -637,7 +638,7 @@ SinkFinalizeType PhysicalSpatialJoin::Finalize(Pipeline &pipeline, Event &event, const auto &geom = geom_ptr[row_idx]; Box2D bbox; - if (!geom.TryGetCachedBounds(bbox)) { + if (!Serde::TryGetBounds(geom, bbox)) { // Skip empty geometries continue; } @@ -819,11 +820,11 @@ OperatorResultType PhysicalSpatialJoin::ExecuteInternal(ExecutionContext &contex continue; } - const auto geom_ptr = UnifiedVectorFormat::GetData(lstate.probe_side_key_vformat); + const auto geom_ptr = UnifiedVectorFormat::GetData(lstate.probe_side_key_vformat); const auto &geom = geom_ptr[geom_idx]; Box2D bbox; - if (!geom.TryGetCachedBounds(bbox)) { + if (!Serde::TryGetBounds(geom, bbox)) { lstate.input_index++; continue; } diff --git a/src/spatial/spatial_geoarrow.cpp b/src/spatial/spatial_geoarrow.cpp deleted file mode 100644 index 7c1cb82a..00000000 --- a/src/spatial/spatial_geoarrow.cpp +++ /dev/null @@ -1,161 +0,0 @@ -#include "spatial/spatial_geoarrow.hpp" - -#include "duckdb/common/arrow/arrow_converter.hpp" -#include "duckdb/common/arrow/schema_metadata.hpp" -#include "duckdb/function/table/arrow/arrow_duck_schema.hpp" -#include "duckdb/function/table_function.hpp" -#include "duckdb/main/database.hpp" -#include "geometry/geometry_serialization.hpp" -#include "spatial/geometry/geometry_type.hpp" -#include "spatial/geometry/sgl.hpp" -#include "spatial/geometry/wkb_writer.hpp" -#include "spatial/spatial_types.hpp" -#include "yyjson.h" - -namespace duckdb { - -namespace { - -struct GeoArrowWKB { - static unique_ptr GetType(const ArrowSchema &schema, const ArrowSchemaMetadata &schema_metadata) { - // Validate extension metadata. This metadata also contains a CRS, which we drop - // because the GEOMETRY type does not implement a CRS at the type level. - string extension_metadata = schema_metadata.GetOption(ArrowSchemaMetadata::ARROW_METADATA_KEY); - if (!extension_metadata.empty()) { - using namespace duckdb_yyjson_spatial; - - unique_ptr doc( - yyjson_read(extension_metadata.data(), extension_metadata.size(), YYJSON_READ_NOFLAG), yyjson_doc_free); - if (!doc) { - throw SerializationException("Invalid JSON in GeoArrow metadata"); - } - - yyjson_val *val = yyjson_doc_get_root(doc.get()); - if (!yyjson_is_obj(val)) { - throw SerializationException("Invalid GeoArrow metadata: not a JSON object"); - } - - yyjson_val *edges = yyjson_obj_get(val, "edges"); - if (edges && yyjson_is_str(edges) && std::strcmp(yyjson_get_str(edges), "planar") != 0) { - throw NotImplementedException("Can't import non-planar edges"); - } - } - - const auto format = string(schema.format); - if (format == "z") { - return make_uniq(, - make_uniq(ArrowVariableSizeType::NORMAL)); - } else if (format == "Z") { - return make_uniq(LogicalType::GEOMETRY(), - make_uniq(ArrowVariableSizeType::SUPER_SIZE)); - } else if (format == "vz") { - return make_uniq(LogicalType::GEOMETRY(), make_uniq(ArrowVariableSizeType::VIEW)); - } - throw InvalidInputException("Arrow extension type \"%s\" not supported for geoarrow.wkb", format.c_str()); - } - - static void PopulateSchema(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &schema, const LogicalType &type, - ClientContext &context, const ArrowTypeExtension &extension) { - ArrowSchemaMetadata schema_metadata; - schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_EXTENSION_NAME, "geoarrow.wkb"); - schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_METADATA_KEY, "{}"); - root_holder.metadata_info.emplace_back(schema_metadata.SerializeMetadata()); - schema.metadata = root_holder.metadata_info.back().get(); - - const auto options = context.GetClientProperties(); - if (options.arrow_offset_size == ArrowOffsetSize::LARGE) { - schema.format = "Z"; - } else { - schema.format = "z"; - } - } - - static void ArrowToDuck(ClientContext &context, Vector &source, Vector &result, idx_t count) { - // Just use the default allocator, invoking the buffer manager on each call is a bit much. - ArenaAllocator arena(Allocator::Get(context)); - GeometryAllocator alloc(arena); - - sgl::wkb_reader reader(alloc); - reader.set_allow_mixed_zm(true); - reader.set_nan_as_empty(true); - - UnaryExecutor::ExecuteWithNulls( - source, result, count, [&](const string_t &wkb, ValidityMask &mask, idx_t idx) { - const auto wkb_ptr = wkb.GetDataUnsafe(); - const auto wkb_len = wkb.GetSize(); - - sgl::geometry geom; - - if (!reader.try_parse(geom, wkb_ptr, wkb_len)) { - const auto error = reader.get_error_message(); - throw InvalidInputException("Could not parse WKB input: %s", error); - } - - // We're a bit lenient and allow mixed ZM, but correct it here. - if (reader.parsed_mixed_zm()) { - sgl::ops::force_zm(alloc, geom, reader.parsed_any_z(), reader.parsed_any_m(), 0, 0); - } - - // Serialize the geometry to the result blob - const auto size = Serde::GetRequiredSize(geom); - auto blob = StringVector::EmptyString(result, size); - Serde::Serialize(geom, blob.GetDataWriteable(), size); - blob.Finalize(); - return blob; - }); - } - - static void DuckToArrow(ClientContext &context, Vector &source, Vector &result, idx_t count) { - WKBWriter writer; - UnaryExecutor::Execute( - source, result, count, [&](const geometry_t &input) { return writer.Write(input, result); }); - } -}; - -void RegisterArrowExtensions(DBConfig &config) { - config.RegisterArrowExtension( - {"geoarrow.wkb", GeoArrowWKB::PopulateSchema, GeoArrowWKB::GetType, - make_shared_ptr(LogicalType::GEOMETRY(), LogicalType::BLOB, GeoArrowWKB::ArrowToDuck, - GeoArrowWKB::DuckToArrow)}); -} - -class GeoArrowRegisterFunctionData final : public TableFunctionData { -public: - GeoArrowRegisterFunctionData() : finished(false) { - } - bool finished {false}; -}; - -unique_ptr GeoArrowRegisterBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - names.push_back("registered"); - return_types.push_back(LogicalType::BOOLEAN); - return make_uniq(); -} - -void GeoArrowRegisterScan(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { - auto &data = data_p.bind_data->CastNoConst(); - if (data.finished) { - return; - } - - DBConfig &config = DatabaseInstance::GetDatabase(context).config; - if (config.HasArrowExtension(LogicalType::GEOMETRY())) { - output.SetValue(0, 0, false); - } else { - RegisterArrowExtensions(config); - output.SetValue(0, 0, true); - } - - output.SetCardinality(1); - data.finished = true; -} - -} // namespace - -void GeoArrow::Register(ExtensionLoader &loader) { - TableFunction register_func("register_geoarrow_extensions", {}, GeoArrowRegisterScan, GeoArrowRegisterBind); - loader.RegisterFunction(register_func); -} - -} // namespace duckdb diff --git a/src/spatial/spatial_geoarrow.hpp b/src/spatial/spatial_geoarrow.hpp deleted file mode 100644 index df3f78ed..00000000 --- a/src/spatial/spatial_geoarrow.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -namespace duckdb { - -class ExtensionLoader; - -struct GeoArrow { - static void Register(ExtensionLoader &db); -}; - -} // namespace duckdb diff --git a/src/spatial/util/binary_reader.hpp b/src/spatial/util/binary_reader.hpp index 49ef6f9e..3cbcea16 100644 --- a/src/spatial/util/binary_reader.hpp +++ b/src/spatial/util/binary_reader.hpp @@ -10,11 +10,16 @@ namespace duckdb { class BinaryReader { public: + BinaryReader() = default; BinaryReader(const char *ptr, const char *end) : beg(ptr), end(end), ptr(ptr) { } BinaryReader(const char *buffer, const size_t size) : BinaryReader(buffer, buffer + size) { } + bool IsAtEnd() const { + return ptr >= end; + } + template T Read() { static_assert(std::is_trivially_copyable::value, "Type must be trivially copyable"); @@ -76,4 +81,4 @@ class BinaryReader { const char *ptr; }; -} // namespace duckdb \ No newline at end of file +} // namespace duckdb From f7d91cac7955033e48042f61aa4ac86ac25e173a Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 13 Nov 2025 22:13:27 +0100 Subject: [PATCH 18/41] remove more code --- duckdb | 2 +- src/spatial/modules/gdal/gdal_module.cpp | 21 +- .../modules/main/spatial_functions_cast.cpp | 7 - .../modules/main/spatial_functions_scalar.cpp | 231 +----------------- src/spatial/spatial_types.cpp | 9 - src/spatial/spatial_types.hpp | 1 - test/sql/geos/st_minimumrotatedrectangle.test | 2 +- 7 files changed, 13 insertions(+), 260 deletions(-) diff --git a/duckdb b/duckdb index 6c5e16c2..20696d80 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 6c5e16c2fb342c3218e5a4a59b6292dae3326e83 +Subproject commit 20696d805ee421264950657119078ca621c8839b diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index 1b1761aa..e24bdc34 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -630,7 +630,7 @@ struct ST_Read : ArrowTableFunction { result->spatial_filter = make_uniq(minx, miny, maxx, maxy); } - if (loption == "spatial_filter" && kv.second.type() == GeoTypes::WKB_BLOB()) { + if (loption == "spatial_filter" && kv.second.type() == LogicalType::GEOMETRY()) { if (result->spatial_filter) { throw BinderException("Only one spatial filter can be specified"); } @@ -725,7 +725,7 @@ struct ST_Read : ArrowTableFunction { result->arrow_table.AddColumn(col_idx, std::move(arrow_type), column_name); if (result->keep_wkb) { - return_types.emplace_back(GeoTypes::WKB_BLOB()); + return_types.emplace_back(LogicalType::BLOB); } else { return_types.emplace_back(LogicalType::GEOMETRY()); if (column_name == "wkb_geometry") { @@ -1090,7 +1090,7 @@ struct ST_Read : ArrowTableFunction { func.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); func.named_parameters["sibling_files"] = LogicalType::LIST(LogicalType::VARCHAR); func.named_parameters["spatial_filter_box"] = GeoTypes::BOX_2D(); - func.named_parameters["spatial_filter"] = GeoTypes::WKB_BLOB(); + func.named_parameters["spatial_filter"] = LogicalType::GEOMETRY(); func.named_parameters["layer"] = LogicalType::VARCHAR; func.named_parameters["sequential_layer_scan"] = LogicalType::BOOLEAN; func.named_parameters["max_batch_size"] = LogicalType::INTEGER; @@ -1615,7 +1615,7 @@ struct ST_Write { }; static bool IsGeometryType(const LogicalType &type) { - return type == GeoTypes::WKB_BLOB() || type == GeoTypes::POINT_2D() || type == LogicalType::GEOMETRY(); + return type == GeoTypes::POINT_2D() || type == LogicalType::GEOMETRY(); } static unique_ptr OGRFieldTypeFromLogicalType(const string &name, const LogicalType &type) { @@ -1784,19 +1784,6 @@ struct ST_Write { return nullptr; } - if (type == GeoTypes::WKB_BLOB()) { - const auto str = value.GetValueUnsafe(); - OGRGeometry *ptr; - size_t consumed; - const auto ok = OGRGeometryFactory::createFromWkb(str.GetDataUnsafe(), nullptr, &ptr, str.GetSize(), - wkbVariantIso, consumed); - - if (ok != OGRERR_NONE) { - throw IOException("Could not parse WKB"); - } - return OGRGeometryUniquePtr(ptr); - } - if (type == LogicalType::GEOMETRY()) { const auto blob = value.GetValueUnsafe(); uint32_t size = blob.GetSize(); diff --git a/src/spatial/modules/main/spatial_functions_cast.cpp b/src/spatial/modules/main/spatial_functions_cast.cpp index 36b988d9..5ee0096e 100644 --- a/src/spatial/modules/main/spatial_functions_cast.cpp +++ b/src/spatial/modules/main/spatial_functions_cast.cpp @@ -117,7 +117,6 @@ struct GeometryCasts { // Register //------------------------------------------------------------------------------------------------------------------ static void Register(ExtensionLoader &loader) { - const auto wkb_type = GeoTypes::WKB_BLOB(); const auto geom_type = LogicalType::GEOMETRY(); // Geometry -> WKB is explicitly castable @@ -125,12 +124,6 @@ struct GeometryCasts { // Geometry -> BLOB is explicitly castable loader.RegisterCastFunction(geom_type, LogicalType::BLOB, DefaultCasts::ReinterpretCast); - - // WKB -> Geometry is explicitly castable - loader.RegisterCastFunction(wkb_type, geom_type, BoundCastInfo(FromWKBCast, nullptr, LocalState::InitCast)); - - // WKB -> BLOB is implicitly castable - loader.RegisterCastFunction(wkb_type, LogicalType::BLOB, DefaultCasts::ReinterpretCast, 1); } }; diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index c95d23f4..393fa972 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -1020,13 +1020,6 @@ struct ST_AsWKB { //------------------------------------------------------------------------------------------------------------------ static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_AsWKB", [](ScalarFunctionBuilder &func) { - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("geom", LogicalType::GEOMETRY()); - variant.SetReturnType(GeoTypes::WKB_BLOB()); - - variant.SetFunction(Execute); - }); - func.SetDescription(DESCRIPTION); func.SetExample(EXAMPLE); @@ -1047,18 +1040,18 @@ struct ST_AsHEXWKB { //------------------------------------------------------------------------------------------------------------------ static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { - auto size = blob.GetSize(); - auto data = blob.GetData(); + const auto size = blob.GetSize(); + const auto data = const_data_ptr_cast(blob.GetData()); auto blob_size = size * 2; // every byte is rendered as two characters auto blob_str = StringVector::EmptyString(result, blob_size); - auto blob_ptr = blob_str.GetDataWriteable(); + auto blob_ptr = blob_str.GetDataWriteable(); idx_t str_idx = 0; for (idx_t i = 0; i < size; i++) { - auto byte = data[i]; - auto byte_a = byte >> 4; - auto byte_b = byte & 0x0F; + const auto byte = data[i]; + const auto byte_a = byte >> 4; + const auto byte_b = byte & 0x0F; blob_ptr[str_idx++] = Blob::HEX_TABLE[byte_a]; blob_ptr[str_idx++] = Blob::HEX_TABLE[byte_b]; } @@ -3113,66 +3106,6 @@ struct ST_Extent { } } - //------------------------------------------------------------------------------------------------------------------ - // Execute (WKB) - //------------------------------------------------------------------------------------------------------------------ - static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { - const auto count = args.size(); - auto &input = args.data[0]; - - UnifiedVectorFormat input_vdata; - input.ToUnifiedFormat(count, input_vdata); - - const auto &struct_vec = StructVector::GetEntries(result); - const auto min_x_data = FlatVector::GetData(*struct_vec[0]); - const auto min_y_data = FlatVector::GetData(*struct_vec[1]); - const auto max_x_data = FlatVector::GetData(*struct_vec[2]); - const auto max_y_data = FlatVector::GetData(*struct_vec[3]); - - auto &lstate = LocalState::ResetAndGet(state); - - sgl::wkb_reader reader(lstate.GetAllocator()); - reader.set_allow_mixed_zm(true); - reader.set_nan_as_empty(true); - - for (idx_t out_idx = 0; out_idx < count; out_idx++) { - const auto row_idx = input_vdata.sel->get_index(out_idx); - - if (!input_vdata.validity.RowIsValid(row_idx)) { - FlatVector::SetNull(result, out_idx, true); - continue; - } - - const auto &blob = UnifiedVectorFormat::GetData(input_vdata)[row_idx]; - - const auto wkb_buf = blob.GetDataUnsafe(); - const auto wkb_len = blob.GetSize(); - - sgl::extent_xy bbox = sgl::extent_xy::smallest(); - size_t vertex_count = 0; - if (!reader.try_parse_stats(bbox, vertex_count, wkb_buf, wkb_len)) { - const auto error = reader.get_error_message(); - throw InvalidInputException("Failed to parse WKB: %s", error); - } - - if (vertex_count == 0) { - // no vertices -> no extent -> return null - FlatVector::SetNull(result, out_idx, true); - continue; - } - - // Else, write the bounding box - min_x_data[out_idx] = bbox.min.x; - min_y_data[out_idx] = bbox.min.y; - max_x_data[out_idx] = bbox.max.x; - max_y_data[out_idx] = bbox.max.y; - } - - if (args.AllConstant()) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - } - } - //------------------------------------------------------------------------------------------------------------------ // Documentation //------------------------------------------------------------------------------------------------------------------ @@ -3196,14 +3129,6 @@ struct ST_Extent { variant.SetFunction(Execute); }); - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(GeoTypes::BOX_2D()); - - variant.SetFunction(ExecuteWKB); - variant.SetInit(LocalState::Init); - }); - func.SetDescription(DESCRIPTION); func.SetExample(EXAMPLE); @@ -4033,28 +3958,6 @@ struct ST_GeometryType { *ConstantVector::GetData(result) = LEGACY_POLYGON_TYPE; } - //------------------------------------------------------------------------------------------------------------------ - // WKB - //------------------------------------------------------------------------------------------------------------------ - static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { - - UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &blob) { - BinaryReader cursor(blob.GetData(), blob.GetSize()); - - const auto le = cursor.Read(); - const auto type = le ? cursor.Read() : cursor.ReadBE(); - const auto normalized_type = (type & 0xffff) % 1000; - - if (normalized_type == 0 || normalized_type > 7) { - return LEGACY_UNKNOWN_TYPE; - } - - // Return the geometry type - // Subtract 1 since the WKB type is 1-indexed - return static_cast(normalized_type - 1); - }); - } - //------------------------------------------------------------------------------------------------------------------ // Documentation //------------------------------------------------------------------------------------------------------------------ @@ -4106,14 +4009,6 @@ struct ST_GeometryType { variant.SetFunction(ExecutePolygon); }); - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(LogicalTypeId::ANY); - - variant.SetBind(Bind); - variant.SetFunction(ExecuteWKB); - }); - func.SetDescription(DESCRIPTION); func.SetExample(EXAMPLE); @@ -5040,14 +4935,6 @@ struct ST_GeomFromWKB { //------------------------------------------------------------------------------------------------------------------ static void Register(ExtensionLoader &loader) { FunctionBuilder::RegisterScalar(loader, "ST_Point2DFromWKB", [](ScalarFunctionBuilder &builder) { - builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(GeoTypes::POINT_2D()); - - variant.SetInit(LocalState::Init); - variant.SetFunction(ExecutePoint); - }); - builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("blob", LogicalType::BLOB); variant.SetReturnType(GeoTypes::POINT_2D()); @@ -5063,14 +4950,6 @@ struct ST_GeomFromWKB { }); FunctionBuilder::RegisterScalar(loader, "ST_LineString2DFromWKB", [](ScalarFunctionBuilder &builder) { - builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(GeoTypes::LINESTRING_2D()); - - variant.SetInit(LocalState::Init); - variant.SetFunction(ExecuteLineString); - }); - builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("blob", LogicalType::BLOB); variant.SetReturnType(GeoTypes::LINESTRING_2D()); @@ -5086,13 +4965,6 @@ struct ST_GeomFromWKB { }); FunctionBuilder::RegisterScalar(loader, "ST_Polygon2DFromWKB", [](ScalarFunctionBuilder &builder) { - builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(GeoTypes::POLYGON_2D()); - - variant.SetInit(LocalState::Init); - variant.SetFunction(ExecutePolygon); - }); builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("blob", LogicalType::BLOB); variant.SetReturnType(GeoTypes::POLYGON_2D()); @@ -5108,14 +4980,6 @@ struct ST_GeomFromWKB { }); FunctionBuilder::RegisterScalar(loader, "ST_GeomFromWKB", [](ScalarFunctionBuilder &builder) { - builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(LogicalType::GEOMETRY()); - - variant.SetInit(LocalState::Init); - variant.SetFunction(ExecuteGeometry); - }); - builder.AddVariant([](ScalarFunctionVariantBuilder &variant) { variant.AddParameter("blob", LogicalType::BLOB); variant.SetReturnType(LogicalType::GEOMETRY()); @@ -5153,22 +5017,6 @@ struct ST_HasZ { }); } - //------------------------------------------------------------------------------------------------------------------ - // WKB - //------------------------------------------------------------------------------------------------------------------ - static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { - UnaryExecutor::Execute(args.data[0], result, args.size(), [](const string_t &wkb) { - BinaryReader cursor(wkb.GetData(), wkb.GetSize()); - - const auto le = cursor.Read(); - const auto type = le ? cursor.Read() : cursor.ReadBE(); - - // Check for ISO WKB and EWKB Z flag; - const auto flags = (type & 0xffff) / 1000; - return flags == 1 || flags == 3 || ((type & 0x80000000) != 0); - }); - } - //------------------------------------------------------------------------------------------------------------------ // Documentation //------------------------------------------------------------------------------------------------------------------ @@ -5209,13 +5057,6 @@ struct ST_HasZ { variant.SetFunction(ExecuteGeometry); }); - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(LogicalType::BOOLEAN); - - variant.SetFunction(ExecuteWKB); - }); - func.SetDescription(DESCRIPTION); func.SetExample(EXAMPLE); @@ -5246,22 +5087,6 @@ struct ST_HasM { }); } - //------------------------------------------------------------------------------------------------------------------ - // WKB_BLOB - //------------------------------------------------------------------------------------------------------------------ - static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { - UnaryExecutor::Execute(args.data[0], result, args.size(), [](const string_t &wkb) { - BinaryReader cursor(wkb.GetData(), wkb.GetSize()); - - const auto le = cursor.Read(); - const auto type = le ? cursor.Read() : cursor.ReadBE(); - - // Check for ISO WKB and EWKB M flag; - const auto flags = (type & 0xffff) / 1000; - return flags == 2 || flags == 3 || ((type & 0x40000000) != 0); - }); - } - //------------------------------------------------------------------------------------------------------------------ // Documentation //------------------------------------------------------------------------------------------------------------------ @@ -5302,13 +5127,6 @@ struct ST_HasM { variant.SetFunction(ExecuteGeometry); }); - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(LogicalType::BOOLEAN); - - variant.SetFunction(ExecuteWKB); - }); - func.SetDescription(DESCRIPTION); func.SetExample(EXAMPLE); @@ -5856,34 +5674,6 @@ struct ST_ZMFlag { }); } - //------------------------------------------------------------------------------------------------------------------ - // WKB - //------------------------------------------------------------------------------------------------------------------ - static void ExecuteWKB(DataChunk &args, ExpressionState &state, Vector &result) { - UnaryExecutor::Execute(args.data[0], result, args.size(), [](const string_t &wkb) { - BinaryReader cursor(wkb.GetData(), wkb.GetSize()); - - const auto le = cursor.Read(); - const auto type = le ? cursor.Read() : cursor.ReadBE(); - - // Check for ISO WKB and EWKB Z and M flags - const uint32_t iso_wkb_props = (type & 0xffff) / 1000; - const auto has_z = (iso_wkb_props == 1) || (iso_wkb_props == 3) || ((type & 0x80000000) != 0); - const auto has_m = (iso_wkb_props == 2) || (iso_wkb_props == 3) || ((type & 0x40000000) != 0); - - if (has_z && has_m) { - return 3; - } - if (has_z) { - return 2; - } - if (has_m) { - return 1; - } - return 0; - }); - } - //------------------------------------------------------------------------------------------------------------------ // Documentation //------------------------------------------------------------------------------------------------------------------ @@ -5930,13 +5720,6 @@ struct ST_ZMFlag { variant.SetFunction(ExecuteGeometry); }); - func.AddVariant([](ScalarFunctionVariantBuilder &variant) { - variant.AddParameter("wkb", GeoTypes::WKB_BLOB()); - variant.SetReturnType(LogicalType::UTINYINT); - - variant.SetFunction(ExecuteWKB); - }); - func.SetDescription(DESCRIPTION); func.SetExample(EXAMPLE); @@ -9349,7 +9132,7 @@ void RegisterSpatialScalarFunctions(ExtensionLoader &loader) { ST_Area::Register(loader); ST_AsGeoJSON::Register(loader); ST_AsText::Register(loader); - ST_AsWKB::Register(loader); + // ST_AsWKB::Register(loader); ST_AsHEXWKB::Register(loader); ST_AsSVG::Register(loader); ST_Azimuth::Register(loader); diff --git a/src/spatial/spatial_types.cpp b/src/spatial/spatial_types.cpp index 9d54d251..7037be0e 100644 --- a/src/spatial/spatial_types.cpp +++ b/src/spatial/spatial_types.cpp @@ -76,12 +76,6 @@ LogicalType GeoTypes::LEGACY_GEOMETRY() { return blob_type; } -LogicalType GeoTypes::WKB_BLOB() { - auto blob_type = LogicalType(LogicalTypeId::BLOB); - blob_type.SetAlias("WKB_BLOB"); - return blob_type; -} - LogicalType GeoTypes::CreateEnumType(const string &name, const vector &members) { auto varchar_vector = Vector(LogicalType::VARCHAR, members.size()); auto varchar_data = FlatVector::GetData(varchar_vector); @@ -125,9 +119,6 @@ void GeoTypes::Register(ExtensionLoader &loader) { // GEOMETRY loader.RegisterType("GEOMETRY", GeoTypes::LEGACY_GEOMETRY()); - - // WKB_BLOB - loader.RegisterType("WKB_BLOB", GeoTypes::WKB_BLOB()); } } // namespace duckdb diff --git a/src/spatial/spatial_types.hpp b/src/spatial/spatial_types.hpp index d968777f..5eba3783 100644 --- a/src/spatial/spatial_types.hpp +++ b/src/spatial/spatial_types.hpp @@ -18,7 +18,6 @@ struct GeoTypes { static LogicalType POLYGON_3D(); static LogicalType BOX_2D(); static LogicalType BOX_2DF(); - static LogicalType WKB_BLOB(); // Old geometry type (pre v1.5) static LogicalType LEGACY_GEOMETRY(); diff --git a/test/sql/geos/st_minimumrotatedrectangle.test b/test/sql/geos/st_minimumrotatedrectangle.test index 166082e8..81791af5 100644 --- a/test/sql/geos/st_minimumrotatedrectangle.test +++ b/test/sql/geos/st_minimumrotatedrectangle.test @@ -9,4 +9,4 @@ SELECT ST_AsText( ) ); ---- -POLYGON ((-0.091466 51.467727, -0.095303577854704 51.466559041522515, -0.095749325259532 51.46802364013844, -0.091911747404837 51.46919159861591, -0.091466 51.467727)) \ No newline at end of file +POLYGON ((-0.091466 51.467727, -0.09530357785470375 51.466559041522515, -0.09574932525953221 51.46802364013844, -0.09191174740483712 51.46919159861591, -0.091466 51.467727)) From c3ab04c2634f7e6b60e37695e88953aa06706655 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 13 Nov 2025 23:58:42 +0100 Subject: [PATCH 19/41] begin sketching out new gdal --- src/spatial/modules/gdal/gdal_functions.cpp | 218 +++++++++++--------- src/spatial/modules/gdal/gdal_module.cpp | 2 + 2 files changed, 119 insertions(+), 101 deletions(-) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp index 79ca9f54..348d41a8 100644 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -16,6 +16,8 @@ #include "duckdb/function/table/arrow.hpp" #include "duckdb/main/database.hpp" +#include + namespace duckdb { namespace { @@ -24,32 +26,13 @@ namespace { //====================================================================================================================== namespace gdal_read { -class StringList { -public: - void Add(const string &item) { - const auto cstr = new char[item.size() + 1]; - strcpy(cstr, item.c_str()); - items.insert(items.end() - 1, cstr); - } - - char** Get() { return items.data(); } - - ~StringList() { - for (const auto &item : items) { - delete[] item; - } - } -private: - vector items = { nullptr }; -}; - //---------------------------------------------------------------------------------------------------------------------- // BIND //---------------------------------------------------------------------------------------------------------------------- class BindData final : public TableFunctionData { public: string file_path; - StringList layer_options; + CPLStringList layer_options; }; auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector &col_types, vector &col_names) @@ -60,8 +43,8 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector result->file_path = input.inputs[0].GetValue(); // Set GDAL Arrow layer options - result->layer_options.Add(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE)); - result->layer_options.Add("GEOMETRY_METADATA_ENCODING=GEOARROW"); + result->layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).c_str()); + result->layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); const auto dataset = GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); if (!dataset) { @@ -81,7 +64,7 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector } ArrowArrayStream stream; - if (!OGR_L_GetArrowStream(layer, &stream, result->layer_options.Get())) { + if (!OGR_L_GetArrowStream(layer, &stream, result->layer_options.List())) { GDALClose(dataset); throw IOException("Could not get GDAL Arrow stream at: %s", result->file_path); } @@ -109,6 +92,10 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector return std::move(result); } +//---------------------------------------------------------------------------------------------------------------------- +// GLOBAL STATE +//---------------------------------------------------------------------------------------------------------------------- + class GlobalState final : public GlobalTableFunctionState { public: @@ -124,6 +111,7 @@ class GlobalState final : public GlobalTableFunctionState { } GDALDatasetH dataset; + CPLStringList layer_options; OGRLayerH layer; ArrowArrayStream stream; vector> col_types; @@ -139,16 +127,17 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique auto result = make_uniq(); result->dataset = dataset; + result->layer_options = bdata.layer_options; // Get the first layer result->layer = GDALDatasetGetLayer(dataset, 0); - StringList layer_options; - layer_options.Add(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE)); - layer_options.Add("GEOMETRY_METADATA_ENCODING=GEOARROW"); + CPLStringList layer_options; + layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).data()); + layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); // Open the Arrow stream - if (!OGR_L_GetArrowStream(result->layer, &result->stream, layer_options.Get())) { + if (!OGR_L_GetArrowStream(result->layer, &result->stream, result->layer_options.List())) { GDALClose(dataset); throw IOException("Could not get GDAL Arrow stream at: foo"); } @@ -169,6 +158,10 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique return std::move(result); } +//---------------------------------------------------------------------------------------------------------------------- +// SCAN +//---------------------------------------------------------------------------------------------------------------------- + void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { auto &bdata = input.bind_data->Cast(); auto &state = input.global_state->Cast(); @@ -187,8 +180,9 @@ void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) auto &arrow_type = *state.col_types[i]; auto array_state = ArrowArrayScanState(context); + // We need to make sure that our chunk will hold the ownership - array_state.owned_data = duckdb::make_shared_ptr(); + array_state.owned_data = make_shared_ptr(); array_state.owned_data->arrow_array = arrow_array; // We set it to nullptr to effectively transfer the ownership @@ -237,8 +231,10 @@ class BindData final : public TableFunctionData { string file_path; string driver_name; string layer_name; - vector driver_options; - vector layer_options; + + CPLStringList driver_options; + CPLStringList layer_options; + string target_srs; OGRwkbGeometryType geometry_type; @@ -327,14 +323,14 @@ auto Bind(ClientContext &context, CopyFunctionBindInput &input, const vectorlayer_options.push_back(val.GetValue()); + result->layer_options.AddString(val.GetValue().c_str()); } continue; } if (MatchOption("DATASET_CREATION_OPTIONS", option, true)) { for (auto &val : option.second) { - result->driver_options.push_back(val.GetValue()); + result->driver_options.AddString(val.GetValue().c_str()); } continue; } @@ -394,89 +390,86 @@ class GlobalState final : public GlobalFunctionData { GDALClose(dataset); dataset = nullptr; } - - if (array.release) { - array.release(&array); - array.release = nullptr; + if (srs) { + OSRDestroySpatialReference(srs); + srs = nullptr; } } - void Open(const BindData &data) { + mutex lock; + GDALDatasetH dataset = nullptr; + OGRLayerH layer = nullptr; + OGRSpatialReferenceH srs = nullptr; +}; - const auto driver = GDALGetDriverByName(data.driver_name.c_str()); - if (!driver) { - throw InvalidInputException("Could not find GDAL driver: " + data.driver_name); - } +auto InitGlobal(ClientContext &context, FunctionData &bdata_p, const string &path) -> unique_ptr { + auto &bdata = bdata_p.Cast(); + auto result = make_uniq(); - // Make CPL list for driver options - vector cpl_driver_options; - for (auto &option : data.driver_options) { - cpl_driver_options.push_back(option.c_str()); - } - cpl_driver_options.push_back(nullptr); + const auto driver = GDALGetDriverByName(bdata.driver_name.c_str()); + if (!driver) { + throw InvalidInputException("Could not find GDAL driver: " + bdata.driver_name); + } - // Create Dataset - dataset = GDALCreate(driver, data.file_path.c_str(), 0, 0, 0, GDT_Unknown, nullptr); - if (!dataset) { - throw IOException("Could not create GDAL dataset at: " + data.file_path); - } + // Create Dataset + result->dataset = GDALCreate(driver, bdata.file_path.c_str(), 0, 0, 0, GDT_Unknown, bdata.driver_options); + if (!result->dataset) { + throw IOException("Could not create GDAL dataset at: " + bdata.file_path); + } - // Make CPL list for layer options - vector cpl_layer_options; - for (auto &option : data.layer_options) { - cpl_layer_options.push_back(option.c_str()); - } - cpl_layer_options.push_back(nullptr); + if (!bdata.target_srs.empty()) { + // Make a new spatial reference object, and set it from the user input + result->srs = OSRNewSpatialReference(nullptr); + OSRSetFromUserInput(result->srs, bdata.target_srs.c_str()); + } - // Create Layer - layer = GDALDatasetCreateLayer(dataset, data.driver_name.c_str(), nullptr, wkbUnknown, nullptr); - if (!layer) { - throw IOException("Could not create GDAL layer in dataset at: " + data.file_path); - } + // Create Layer + result->layer = GDALDatasetCreateLayer( + result->dataset, + bdata.driver_name.c_str(), + result->srs, + bdata.geometry_type, + bdata.layer_options); - // Create fields for all children - auto geometry_field_count = 0; - for (auto i = 0; i < data.schema.n_children; i++) { - const auto child_schema = data.schema.children[i]; - - // Check if this is a geometry field - if (child_schema->metadata != nullptr) { - // TODO: Look for arrow metadata! - geometry_field_count++; - if (geometry_field_count > 1) { - throw NotImplementedException("Multiple geometry fields not supported yet"); - } - } else { - // Register normal attribute - if (!OGR_L_CreateFieldFromArrowSchema(layer, child_schema, nullptr)) { - throw IOException("Could not create field in GDAL layer for column: " + string(child_schema->name)); - } + if (!result->layer) { + throw IOException("Could not create GDAL layer in dataset at: " + bdata.file_path); + } + + // Create fields for all children + auto geometry_field_count = 0; + for (auto i = 0; i < bdata.schema.n_children; i++) { + const auto child_schema = bdata.schema.children[i]; + + // Check if this is a geometry field + if (child_schema->metadata != nullptr) { + // TODO: Look for arrow metadata! + geometry_field_count++; + if (geometry_field_count > 1) { + throw NotImplementedException("Multiple geometry fields not supported yet"); + } + } else { + // Register normal attribute + if (!OGR_L_CreateFieldFromArrowSchema(result->layer, child_schema, nullptr)) { + throw IOException("Could not create field in GDAL layer for column: " + string(child_schema->name)); } } } -public: - mutex lock; - GDALDatasetH dataset; - OGRLayerH layer; - ArrowArray array; -}; - -auto InitGlobal(ClientContext &context, FunctionData &bdata, const string &path) -> unique_ptr { - auto &bind_data = bdata.Cast(); - auto result = make_uniq(); - - result->Open(bind_data); return std::move(result); } - //---------------------------------------------------------------------------------------------------------------------- // Local State //---------------------------------------------------------------------------------------------------------------------- class LocalState final : public LocalFunctionData { public: - // No-op, we don't need any local state for now + ~LocalState() override { + if (array.release) { + array.release(&array); + array.release = nullptr; + } + } + ArrowArray array; }; auto InitLocal(ExecutionContext &context, FunctionData &bind_data) -> unique_ptr { @@ -492,16 +485,24 @@ void Sink(ExecutionContext &context, FunctionData &bdata_p, GlobalFunctionData & const auto &bdata = bdata_p.Cast(); auto &gstate = gstate_p.Cast(); + auto &lstate = lstate_p.Cast(); - // Lock - lock_guard guard(gstate.lock); - - auto &arrow_array = gstate.array; + auto &arrow_array = lstate.array; auto &arrow_schema = bdata.schema; + // Convert to Arrow array ArrowConverter::ToArrowArray(input, &arrow_array, bdata.props, bdata.extension_type_cast); - OGR_L_WriteArrowBatch(gstate.layer, &arrow_schema, &arrow_array, nullptr); + // Sink the Arrow array into GDAL + { + // Lock + lock_guard guard(gstate.lock); + + // Sink into GDAL + OGR_L_WriteArrowBatch(gstate.layer, &arrow_schema, &arrow_array, nullptr); + } + + // Release the array if (arrow_array.release) { arrow_array.release(&arrow_array); arrow_array.release = nullptr; @@ -513,14 +514,28 @@ void Sink(ExecutionContext &context, FunctionData &bdata_p, GlobalFunctionData & //---------------------------------------------------------------------------------------------------------------------- void Combine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, LocalFunctionData &lstate) { - + // Nothing to do, we don't have any local state that needs to be merged } //---------------------------------------------------------------------------------------------------------------------- // Finalize //---------------------------------------------------------------------------------------------------------------------- -void Finalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { +void Finalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate_p) { + auto &gstate = gstate_p.Cast(); + + // Flush and close the dataset + GDALFlushCache(gstate.dataset); + GDALClose(gstate.dataset); + gstate.dataset = nullptr; +} +CopyFunctionExecutionMode Mode(bool preserve_insertion_order, bool use_batch_index) { + // Parallel writes have limited utility since we still lock on each write to GDAL layer + // But in theory we still benefit from the parallel conversion to Arrow arrays, and this also allows + // the rest of the pipeline to be parallelized if we don't care about insertion order. + return preserve_insertion_order + ? CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE + : CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE; } //---------------------------------------------------------------------------------------------------------------------- @@ -535,6 +550,7 @@ void Register(ExtensionLoader &loader) { info.copy_to_sink = Sink; info.copy_to_combine = Combine; info.copy_to_finalize = Finalize; + info.execution_mode = Mode; info.extension = "gdal"; loader.RegisterFunction(info); diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index b207d5ff..20aae834 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -665,6 +665,8 @@ struct ST_Read : ArrowTableFunction { result->layer_creation_options.AddString(str.c_str()); } + result->layer_creation_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); + // Get the schema for the selected layer auto layer = dataset->GetLayer(result->layer_idx); From 5cef4546bc9f29150c6fb55983801ac2857a331d Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 14 Nov 2025 10:58:17 +0100 Subject: [PATCH 20/41] pass on stats and cardinality --- src/spatial/modules/gdal/gdal_functions.cpp | 201 ++++++++++++++++---- 1 file changed, 166 insertions(+), 35 deletions(-) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp index 348d41a8..4b932644 100644 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -32,7 +32,22 @@ namespace gdal_read { class BindData final : public TableFunctionData { public: string file_path; + + int layer_idx = 0; + bool sequential_layer_scan = false; + bool keep_wkb = false; + CPLStringList layer_options; + CPLStringList dataset_options; + CPLStringList dataset_sibling; + CPLStringList dataset_drivers; + + int64_t estimated_cardinality = 0; + unordered_set geometry_columns = {}; + + OGREnvelope layer_extent; + bool has_extent = false; + OGRwkbGeometryType layer_type = wkbUnknown; }; auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector &col_types, vector &col_names) @@ -40,54 +55,112 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector auto result = make_uniq(); + // Pass file path result->file_path = input.inputs[0].GetValue(); - // Set GDAL Arrow layer options - result->layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).c_str()); - result->layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); + // Parse options + const auto dataset_options_param = input.named_parameters.find("open_options"); + if (dataset_options_param != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(dataset_options_param->second)) { + result->dataset_options.AddString(StringValue::Get(param).c_str()); + } + } - const auto dataset = GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); - if (!dataset) { - GDALClose(dataset); - throw IOException("Could not open GDAL dataset at: %s", result->file_path); + const auto drivers_param = input.named_parameters.find("allowed_drivers"); + if (drivers_param != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(drivers_param->second)) { + result->dataset_drivers.AddString(StringValue::Get(param).c_str()); + } } - if (GDALDatasetGetLayerCount(dataset) <= 0) { - GDALClose(dataset); - throw IOException("GDAL dataset contains no layers at: %s", result->file_path); + const auto siblings_params = input.named_parameters.find("sibling_files"); + if (siblings_params != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(siblings_params->second)) { + result->dataset_sibling.AddString(StringValue::Get(param).c_str()); + } } - const auto layer = GDALDatasetGetLayer(dataset, 0); - if (!layer) { - GDALClose(dataset); - throw IOException("Could not get GDAL layer at: %s", result->file_path); + const auto sequential_layer_scan_param = input.named_parameters.find("sequential_layer_scan"); + if (sequential_layer_scan_param != input.named_parameters.end()) { + result->sequential_layer_scan = BooleanValue::Get(sequential_layer_scan_param->second); } - ArrowArrayStream stream; - if (!OGR_L_GetArrowStream(layer, &stream, result->layer_options.List())) { - GDALClose(dataset); - throw IOException("Could not get GDAL Arrow stream at: %s", result->file_path); + const auto keep_wkb_param = input.named_parameters.find("keep_wkb"); + if (keep_wkb_param != input.named_parameters.end()) { + result->keep_wkb = BooleanValue::Get(keep_wkb_param->second); } + // Set additional default GDAL Arrow layer options + result->layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).c_str()); + result->layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); + + // Open the dataset and get the Arrow schema + const auto dataset = GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); ArrowSchema schema; - if (stream.get_schema(&stream, &schema) != 0) { - stream.release(&stream); - GDALClose(dataset); - throw IOException("Could not get GDAL Arrow schema at: %s", result->file_path); - } + ArrowArrayStream stream; - // Convert Arrow schema to DuckDB types - for (int64_t i = 0; i < schema.n_children; i++) { - auto &child_schema = *schema.children[i]; - const auto type = ArrowType::GetTypeFromSchema(ctx.db->config, child_schema); - col_names.push_back(child_schema.name); - col_types.push_back(type->GetDuckType()); - } + try { + + if (GDALDatasetGetLayerCount(dataset) <= 0) { + throw IOException("GDAL dataset contains no layers at: %s", result->file_path); + } + + // Get the layer by index + const auto layer = GDALDatasetGetLayer(dataset, 0); + if (!layer) { + throw IOException("Could not get GDAL layer at: %s", result->file_path); + } + + // Estimate cardinality + result->estimated_cardinality = OGR_L_GetFeatureCount(layer, 0); + + // Get extent (Only if spatial filter is not pushed down!) + if (OGR_L_GetExtent(layer, &result->layer_extent, 0) == OGRERR_NONE) { + result->has_extent = true; + } + + // Get the layer geometry type if available + result->layer_type = OGR_L_GetGeomType(layer); + + // Get the arrow stream + if (!OGR_L_GetArrowStream(layer, &stream, result->layer_options.List())) { + throw IOException("Could not get GDAL Arrow stream at: %s", result->file_path); + } + + // And the schema + if (stream.get_schema(&stream, &schema) != 0) { + throw IOException("Could not get GDAL Arrow schema at: %s", result->file_path); + } + + // Convert Arrow schema to DuckDB types + for (int64_t i = 0; i < schema.n_children; i++) { + auto &child_schema = *schema.children[i]; + const auto gdal_type = ArrowType::GetTypeFromSchema(ctx.db->config, child_schema); + auto duck_type = gdal_type->GetDuckType(); + + // Track geometry columns to compute stats later + if (duck_type.id() == LogicalTypeId::GEOMETRY) { + result->geometry_columns.insert(i); + } + + col_names.push_back(child_schema.name); + col_types.push_back(std::move(duck_type)); + } - // Release stream, schema and dataset - schema.release(&schema); - stream.release(&stream); - GDALClose(dataset); + } catch (...) { + // Release stream, schema and dataset + if (dataset) { + GDALClose(dataset); + } + if (schema.release) { + schema.release(&schema); + } + if (stream.release) { + stream.release(&stream); + } + // Re-throw exception + throw; + } return std::move(result); } @@ -163,7 +236,6 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique //---------------------------------------------------------------------------------------------------------------------- void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - auto &bdata = input.bind_data->Cast(); auto &state = input.global_state->Cast(); ArrowArray arrow_array; @@ -211,8 +283,67 @@ void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) output.SetCardinality(arrow_array.length); } +//------------------------------------------------------------------------------------------------------------------ +// Cardinality +//------------------------------------------------------------------------------------------------------------------ +auto Cardinality(ClientContext &context, const FunctionData *data) -> unique_ptr { + auto &bdata = data->Cast(); + auto result = make_uniq(); + + if (bdata.estimated_cardinality > -1) { + result->has_estimated_cardinality = true; + result->estimated_cardinality = bdata.estimated_cardinality; + } + + return result; +} + +auto Statistics(ClientContext &context, const FunctionData *bind_data, column_t column_index) + -> unique_ptr { + + auto &bdata = bind_data->Cast(); + + // If we have an extent, and the column is a geometry column, we can provide min/max stats + if (bdata.has_extent) { + + // Check if this is the only geometry column + const auto is_geom_col = bdata.geometry_columns.find(column_index) != bdata.geometry_columns.end(); + const auto is_only_one = bdata.geometry_columns.size() == 1; + const auto has_stats = bdata.has_extent || bdata.layer_type != wkbUnknown; + + if (is_geom_col && is_only_one && has_stats) { + auto stats = GeometryStats::CreateUnknown(LogicalType::GEOMETRY()); + + if (bdata.has_extent) { + auto &extent = GeometryStats::GetExtent(stats); + extent.x_min = bdata.layer_extent.MinX; + extent.x_max = bdata.layer_extent.MaxX; + extent.y_min = bdata.layer_extent.MinY; + extent.y_max = bdata.layer_extent.MaxY; + } + + const auto geom_type = bdata.layer_type % 1000; + const auto vert_type = bdata.layer_type / 1000; + + if ((geom_type >= 1) && (geom_type <= 7) && (vert_type >= 0) && (vert_type <= 3)) { + auto &types = GeometryStats::GetTypes(stats); + types.Clear(); + types.AddWKBType(static_cast(geom_type)); + } + + return stats.ToUnique(); + } + } + + return nullptr; + +} + void Register(ExtensionLoader &loader) { TableFunction read_func("gdal_read", {LogicalType::VARCHAR}, Scan, Bind, InitGlobal); + read_func.cardinality = Cardinality; + read_func.statistics = Statistics; + loader.RegisterFunction(read_func); } From a6b8cf05a5f30d3b419d7a46d73898de39113559 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 14 Nov 2025 15:19:10 +0100 Subject: [PATCH 21/41] filter pushdown --- src/spatial/modules/gdal/gdal_functions.cpp | 237 +++++++++++++++++--- 1 file changed, 200 insertions(+), 37 deletions(-) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp index 4b932644..c437927c 100644 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -1,7 +1,6 @@ #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/function/copy_function.hpp" - #include "cpl_string.h" #include "cpl_vsi.h" #include "cpl_vsi_error.h" @@ -16,6 +15,9 @@ #include "duckdb/function/table/arrow.hpp" #include "duckdb/main/database.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckdb/planner/expression/bound_constant_expression.hpp" + #include namespace duckdb { @@ -45,13 +47,17 @@ class BindData final : public TableFunctionData { int64_t estimated_cardinality = 0; unordered_set geometry_columns = {}; - OGREnvelope layer_extent; + bool can_filter = false; bool has_extent = false; + bool has_filter = false; + OGREnvelope layer_extent; + OGREnvelope layer_filter; + OGRwkbGeometryType layer_type = wkbUnknown; }; auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector &col_types, vector &col_names) - -> unique_ptr { + -> unique_ptr { auto result = make_uniq(); @@ -92,10 +98,13 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector // Set additional default GDAL Arrow layer options result->layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).c_str()); - result->layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); + if (!result->keep_wkb) { + result->layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); + } // Open the dataset and get the Arrow schema - const auto dataset = GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); + const auto dataset = + GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); ArrowSchema schema; ArrowArrayStream stream; @@ -119,6 +128,11 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector result->has_extent = true; } + // Check if fast spatial filtering is available + if (OGR_L_TestCapability(layer, OLCFastSpatialFilter)) { + result->can_filter = true; + } + // Get the layer geometry type if available result->layer_type = OGR_L_GetGeomType(layer); @@ -149,29 +163,142 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector } catch (...) { // Release stream, schema and dataset - if (dataset) { - GDALClose(dataset); - } if (schema.release) { schema.release(&schema); } if (stream.release) { stream.release(&stream); } + if (dataset) { + GDALClose(dataset); + } // Re-throw exception throw; } + if (schema.release) { + schema.release(&schema); + } + if (stream.release) { + stream.release(&stream); + } + if (dataset) { + GDALClose(dataset); + } + return std::move(result); } //---------------------------------------------------------------------------------------------------------------------- -// GLOBAL STATE +// FILTER (EXPRESSION) PUSHDOWN //---------------------------------------------------------------------------------------------------------------------- +auto Pushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data, vector> &filters) + -> void { + + auto &bdata = bind_data->Cast(); + if (!bdata.can_filter) { + return; + } + + if (bdata.geometry_columns.size() != 1) { + return; // Only optimize if there is a single geometry column + } + + optional_idx geom_filter_idx = optional_idx::Invalid(); + + for (idx_t expr_idx = 0; expr_idx < filters.size(); expr_idx++) { + const auto &expr = filters[expr_idx]; + + if (expr->GetExpressionType() != ExpressionType::BOUND_FUNCTION) { + continue; + } + if (expr->return_type != LogicalType::BOOLEAN) { + continue; + } + const auto &func = expr->Cast(); + if (func.children.size() != 2) { + continue; + } + + if (func.children[0]->return_type.id() != LogicalTypeId::GEOMETRY || + func.children[1]->return_type.id() != LogicalTypeId::GEOMETRY) { + continue; + } + + // The set of geometry predicates that can be optimized using the bounding box + static constexpr const char *geometry_predicates[2] = {"&&", "st_intersects_extent"}; + + auto found = false; + for (const auto &name : geometry_predicates) { + if (StringUtil::CIEquals(func.function.name.c_str(), name)) { + found = true; + break; + } + } + if (!found) { + // Not a geometry predicate we can optimize + continue; + } + + const auto lhs_kind = func.children[0]->GetExpressionType(); + const auto rhs_kind = func.children[1]->GetExpressionType(); + + const auto lhs_is_const = + lhs_kind == ExpressionType::VALUE_CONSTANT && rhs_kind == ExpressionType::BOUND_COLUMN_REF; + const auto rhs_is_const = + rhs_kind == ExpressionType::VALUE_CONSTANT && lhs_kind == ExpressionType::BOUND_COLUMN_REF; + + if (lhs_is_const == rhs_is_const) { + // Both sides are constant or both sides are column refs + continue; + } + + auto &constant_expr = func.children[lhs_is_const ? 0 : 1]->Cast(); + auto &geometry_expr = func.children[lhs_is_const ? 1 : 0]->Cast(); + + if (constant_expr.value.type().id() != LogicalTypeId::GEOMETRY) { + // Constant is not geometry + continue; + } + if (constant_expr.value.IsNull()) { + // Constant is NULL + continue; + } + if (geometry_expr.alias != "geom") { + // Not the geometry column + continue; + } + + auto geom_extent = GeometryExtent::Empty(); + auto geom_binary = string_t(StringValue::Get(constant_expr.value)); + + if (Geometry::GetExtent(geom_binary, geom_extent)) { + bdata.has_filter = true; + bdata.layer_filter.MinX = geom_extent.x_min; + bdata.layer_filter.MinY = geom_extent.y_min; + bdata.layer_filter.MaxX = geom_extent.x_max; + bdata.layer_filter.MaxY = geom_extent.y_max; + } + + // Set the index so we can remove it later + // We can __ONLY__ do this if the filter predicate is "&&" or "st_intersects_extent" + // as other predicates may require exact geometry evaluation, the filter cannot be fully removed + geom_filter_idx = expr_idx; + break; + } + + if (geom_filter_idx != optional_idx::Invalid()) { + // Remove the filter from the list + filters.erase_at(geom_filter_idx.GetIndex()); + } +} + +//---------------------------------------------------------------------------------------------------------------------- +// GLOBAL STATE +//---------------------------------------------------------------------------------------------------------------------- class GlobalState final : public GlobalTableFunctionState { public: - ~GlobalState() override { if (dataset) { GDALClose(dataset); @@ -188,12 +315,14 @@ class GlobalState final : public GlobalTableFunctionState { OGRLayerH layer; ArrowArrayStream stream; vector> col_types; + atomic features_read = {0}; }; auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique_ptr { auto &bdata = input.bind_data->Cast(); - const auto dataset = GDALOpenEx(bdata.file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); + const auto dataset = + GDALOpenEx(bdata.file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); if (!dataset) { throw IOException("Could not open GDAL dataset at: foo"); } @@ -205,6 +334,12 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique // Get the first layer result->layer = GDALDatasetGetLayer(dataset, 0); + // Set the filter, if we got one + if (bdata.has_filter) { + OGR_L_SetSpatialFilterRect(result->layer, bdata.layer_filter.MinX, bdata.layer_filter.MinY, + bdata.layer_filter.MaxX, bdata.layer_filter.MaxY); + } + CPLStringList layer_options; layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).data()); layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); @@ -234,7 +369,6 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique //---------------------------------------------------------------------------------------------------------------------- // SCAN //---------------------------------------------------------------------------------------------------------------------- - void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { auto &state = input.global_state->Cast(); @@ -262,29 +396,28 @@ void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) switch (arrow_type.GetPhysicalType()) { case ArrowArrayPhysicalType::DICTIONARY_ENCODED: - ArrowToDuckDBConversion::ColumnArrowToDuckDBDictionary(vec, arr, 0, array_state, - arrow_array.length, arrow_type); + ArrowToDuckDBConversion::ColumnArrowToDuckDBDictionary(vec, arr, 0, array_state, arrow_array.length, + arrow_type); break; case ArrowArrayPhysicalType::RUN_END_ENCODED: - ArrowToDuckDBConversion::ColumnArrowToDuckDBRunEndEncoded(vec, arr, 0, array_state, - arrow_array.length, arrow_type); + ArrowToDuckDBConversion::ColumnArrowToDuckDBRunEndEncoded(vec, arr, 0, array_state, arrow_array.length, + arrow_type); break; case ArrowArrayPhysicalType::DEFAULT: - ArrowToDuckDBConversion::SetValidityMask(vec, arr, 0, - arrow_array.length, arrow_array.offset, -1); - ArrowToDuckDBConversion::ColumnArrowToDuckDB(vec, arr, 0, array_state, - arrow_array.length, arrow_type); + ArrowToDuckDBConversion::SetValidityMask(vec, arr, 0, arrow_array.length, arrow_array.offset, -1); + ArrowToDuckDBConversion::ColumnArrowToDuckDB(vec, arr, 0, array_state, arrow_array.length, arrow_type); break; default: throw NotImplementedException("ArrowArrayPhysicalType not recognized"); } } + state.features_read += arrow_array.length; output.SetCardinality(arrow_array.length); } //------------------------------------------------------------------------------------------------------------------ -// Cardinality +// CARDINALITY //------------------------------------------------------------------------------------------------------------------ auto Cardinality(ClientContext &context, const FunctionData *data) -> unique_ptr { auto &bdata = data->Cast(); @@ -293,13 +426,18 @@ auto Cardinality(ClientContext &context, const FunctionData *data) -> unique_ptr if (bdata.estimated_cardinality > -1) { result->has_estimated_cardinality = true; result->estimated_cardinality = bdata.estimated_cardinality; + result->has_max_cardinality = true; + result->max_cardinality = bdata.estimated_cardinality; } return result; } +//---------------------------------------------------------------------------------------------------------------------- +// STATISTICS +//---------------------------------------------------------------------------------------------------------------------- auto Statistics(ClientContext &context, const FunctionData *bind_data, column_t column_index) - -> unique_ptr { + -> unique_ptr { auto &bdata = bind_data->Cast(); @@ -336,13 +474,44 @@ auto Statistics(ClientContext &context, const FunctionData *bind_data, column_t } return nullptr; +} + +//---------------------------------------------------------------------------------------------------------------------- +// PROGRESS +//---------------------------------------------------------------------------------------------------------------------- +auto Progress(ClientContext &context, const FunctionData *b_data, const GlobalTableFunctionState *g_state) -> double { + auto &bdata = b_data->Cast(); + auto &gstate = g_state->Cast(); + + if (bdata.estimated_cardinality < 0) { + return 0.0; + } + + const auto count = static_cast(gstate.features_read.load()); + const auto total = static_cast(bdata.estimated_cardinality); + return MinValue(100.0 * (total / count), 100.0); } +//---------------------------------------------------------------------------------------------------------------------- +// REGISTER +//---------------------------------------------------------------------------------------------------------------------- void Register(ExtensionLoader &loader) { TableFunction read_func("gdal_read", {LogicalType::VARCHAR}, Scan, Bind, InitGlobal); read_func.cardinality = Cardinality; read_func.statistics = Statistics; + read_func.table_scan_progress = Progress; + read_func.pushdown_complex_filter = Pushdown; + + read_func.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR); + read_func.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); + read_func.named_parameters["sibling_files"] = LogicalType::LIST(LogicalType::VARCHAR); + // read_func.named_parameters["spatial_filter_box"] = GeoTypes::BOX_2D(); + // read_func.named_parameters["spatial_filter"] = LogicalType::GEOMETRY(); + read_func.named_parameters["layer"] = LogicalType::VARCHAR; + read_func.named_parameters["sequential_layer_scan"] = LogicalType::BOOLEAN; + read_func.named_parameters["max_batch_size"] = LogicalType::INTEGER; + read_func.named_parameters["keep_wkb"] = LogicalType::BOOLEAN; loader.RegisterFunction(read_func); } @@ -381,7 +550,7 @@ class BindData final : public TableFunctionData { } }; -bool MatchOption(const char* name, const pair> &option, bool list = false) { +bool MatchOption(const char *name, const pair> &option, bool list = false) { if (StringUtil::CIEquals(name, option.first)) { if (option.second.empty()) { throw BinderException("GDAL COPY option '%s' requires a value", name); @@ -406,7 +575,7 @@ bool MatchOption(const char* name, const pair> &option, bo } auto Bind(ClientContext &context, CopyFunctionBindInput &input, const vector &names, - const vector &sql_types) -> unique_ptr { + const vector &sql_types) -> unique_ptr { auto result = make_uniq(); // Set file path @@ -515,7 +684,6 @@ auto Bind(ClientContext &context, CopyFunctionBindInput &input, const vectorlayer = GDALDatasetCreateLayer( - result->dataset, - bdata.driver_name.c_str(), - result->srs, - bdata.geometry_type, - bdata.layer_options); + result->layer = GDALDatasetCreateLayer(result->dataset, bdata.driver_name.c_str(), result->srs, bdata.geometry_type, + bdata.layer_options); if (!result->layer) { throw IOException("Could not create GDAL layer in dataset at: " + bdata.file_path); @@ -612,7 +776,7 @@ auto InitLocal(ExecutionContext &context, FunctionData &bind_data) -> unique_ptr // Sink //---------------------------------------------------------------------------------------------------------------------- void Sink(ExecutionContext &context, FunctionData &bdata_p, GlobalFunctionData &gstate_p, LocalFunctionData &lstate_p, - DataChunk &input) { + DataChunk &input) { const auto &bdata = bdata_p.Cast(); auto &gstate = gstate_p.Cast(); @@ -644,7 +808,7 @@ void Sink(ExecutionContext &context, FunctionData &bdata_p, GlobalFunctionData & // Combine //---------------------------------------------------------------------------------------------------------------------- void Combine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate) { + LocalFunctionData &lstate) { // Nothing to do, we don't have any local state that needs to be merged } @@ -664,9 +828,8 @@ CopyFunctionExecutionMode Mode(bool preserve_insertion_order, bool use_batch_ind // Parallel writes have limited utility since we still lock on each write to GDAL layer // But in theory we still benefit from the parallel conversion to Arrow arrays, and this also allows // the rest of the pipeline to be parallelized if we don't care about insertion order. - return preserve_insertion_order - ? CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE - : CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE; + return preserve_insertion_order ? CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE + : CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE; } //---------------------------------------------------------------------------------------------------------------------- @@ -694,4 +857,4 @@ void RegisterExtraFunction(ExtensionLoader &loader) { gdal_copy::Register(loader); gdal_read::Register(loader); } -} // namespace duckdb \ No newline at end of file +} // namespace duckdb From b7e375f4711ecb3651ecb579a9c6e78bdc873aa6 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 14 Nov 2025 20:12:04 +0100 Subject: [PATCH 22/41] fixup osm --- src/spatial/modules/gdal/CMakeLists.txt | 5 +- src/spatial/modules/gdal/gdal_functions.cpp | 153 ++++++++++++++---- .../modules/main/spatial_functions_scalar.cpp | 2 +- test/sql/geos/st_minimumrotatedrectangle.test | 3 + 4 files changed, 128 insertions(+), 35 deletions(-) diff --git a/src/spatial/modules/gdal/CMakeLists.txt b/src/spatial/modules/gdal/CMakeLists.txt index 25cb0464..01fcda1f 100644 --- a/src/spatial/modules/gdal/CMakeLists.txt +++ b/src/spatial/modules/gdal/CMakeLists.txt @@ -1,5 +1,4 @@ set(EXTENSION_SOURCES - ${EXTENSION_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/gdal_module.cpp + ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/gdal_module.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gdal_functions.cpp - PARENT_SCOPE) \ No newline at end of file + PARENT_SCOPE) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp index c437927c..e9b52377 100644 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -1,24 +1,24 @@ #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/function/copy_function.hpp" - -#include "cpl_string.h" -#include "cpl_vsi.h" -#include "cpl_vsi_error.h" -#include "cpl_vsi_virtual.h" - -#include "gdal.h" -#include "ogr_core.h" -#include "ogr_api.h" - +#include "duckdb/function/table/arrow.hpp" #include "duckdb/common/arrow/arrow_converter.hpp" #include "duckdb/common/arrow/arrow.hpp" -#include "duckdb/function/table/arrow.hpp" #include "duckdb/main/database.hpp" - #include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" + +#include "gdal.h" +#include "ogr_core.h" +#include "ogr_api.h" +#include "ogr_srs_api.h" -#include +#include "cpl_string.h" +#include "cpl_vsi.h" +#include "cpl_vsi_error.h" +#include "cpl_vsi_virtual.h" +#include "duckdb/parser/tableref/table_function_ref.hpp" namespace duckdb { namespace { @@ -26,6 +26,7 @@ namespace { //====================================================================================================================== // GDAL READ //====================================================================================================================== + namespace gdal_read { //---------------------------------------------------------------------------------------------------------------------- @@ -36,7 +37,6 @@ class BindData final : public TableFunctionData { string file_path; int layer_idx = 0; - bool sequential_layer_scan = false; bool keep_wkb = false; CPLStringList layer_options; @@ -86,36 +86,83 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector } } - const auto sequential_layer_scan_param = input.named_parameters.find("sequential_layer_scan"); - if (sequential_layer_scan_param != input.named_parameters.end()) { - result->sequential_layer_scan = BooleanValue::Get(sequential_layer_scan_param->second); - } - const auto keep_wkb_param = input.named_parameters.find("keep_wkb"); if (keep_wkb_param != input.named_parameters.end()) { result->keep_wkb = BooleanValue::Get(keep_wkb_param->second); } - // Set additional default GDAL Arrow layer options + // Set additional default GDAL default options + + // This for OSM, but we don't know if we are reading OSM until we open the dataset, so just always set it for now. + result->dataset_options.AddString("INTERLEAVED_READING=YES"); + + // This is so taht we dont have to deal with chunking ourselves, let GDAL do it for us result->layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).c_str()); + + // We always want GeoArrow geometry which DuckDB knows how to convert to GEOMETRY type, unless `keep_wkb` is set if (!result->keep_wkb) { result->layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); } // Open the dataset and get the Arrow schema const auto dataset = - GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); + GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, + result->dataset_drivers, + result->dataset_options, + result->dataset_sibling); + + if (!dataset) { + throw IOException("Could not open GDAL dataset at: %s", result->file_path); + } + ArrowSchema schema; ArrowArrayStream stream; try { - if (GDALDatasetGetLayerCount(dataset) <= 0) { + const auto layer_count = GDALDatasetGetLayerCount(dataset); + if (layer_count <= 0) { throw IOException("GDAL dataset contains no layers at: %s", result->file_path); } + // Find layer + const auto layer_param = input.named_parameters.find("layer"); + + if (layer_param != input.named_parameters.end()) { + if (layer_param->second.type() == LogicalType::INTEGER) { + // Find layer by index + const auto layer_idx = IntegerValue::Get(layer_param->second); + if (layer_idx < 0) { + throw BinderException("Layer index must be positive"); + } + if (layer_idx > layer_count) { + throw BinderException( + StringUtil::Format("Layer index out of range (%s > %s)", layer_idx, layer_count)); + } + result->layer_idx = layer_idx; + } else if (layer_param->second.type() == LogicalType::VARCHAR) { + // Find layer by name + const auto &layer_name = StringValue::Get(layer_param->second); + auto found = false; + for (int i = 0; i < layer_count; i++) { + const auto layer = GDALDatasetGetLayer(dataset, i); + if (!layer) { + continue; + } + if (OGR_L_GetName(layer) == layer_name) { + result->layer_idx = i; + found = true; + break; + } + } + if (!found) { + throw BinderException("Could not find layer with name: %s", layer_name); + } + } + } + // Get the layer by index - const auto layer = GDALDatasetGetLayer(dataset, 0); + const auto layer = GDALDatasetGetLayer(dataset, result->layer_idx); if (!layer) { throw IOException("Could not get GDAL layer at: %s", result->file_path); } @@ -322,7 +369,11 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique auto &bdata = input.bind_data->Cast(); const auto dataset = - GDALOpenEx(bdata.file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, nullptr, nullptr, nullptr); + GDALOpenEx(bdata.file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, + bdata.dataset_drivers, + bdata.dataset_options, + bdata.dataset_sibling); + if (!dataset) { throw IOException("Could not open GDAL dataset at: foo"); } @@ -331,8 +382,27 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique result->dataset = dataset; result->layer_options = bdata.layer_options; - // Get the first layer - result->layer = GDALDatasetGetLayer(dataset, 0); + const auto driver = GDALGetDatasetDriver(dataset); + if (strcmp(GDALGetDriverShortName(driver), "OSM") != 0) { + // Get the layer by index + result->layer = GDALDatasetGetLayer(dataset, bdata.layer_idx); + } else { + // Special case for OSM, which requires sequential reading of layers + const auto layer_count = GDALDatasetGetLayerCount(dataset); + for (int i = 0; i < layer_count; i++) { + result->layer = GDALDatasetGetLayer(dataset, i); + if (i == bdata.layer_idx) { + // desired layer found + break; + } + + // else scan through and empty the layer + OGRFeatureH feature; + while ((feature = OGR_L_GetNextFeature(result->layer)) != nullptr) { + OGR_F_Destroy(feature); + } + } + } // Set the filter, if we got one if (bdata.has_filter) { @@ -347,14 +417,14 @@ auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique // Open the Arrow stream if (!OGR_L_GetArrowStream(result->layer, &result->stream, result->layer_options.List())) { GDALClose(dataset); - throw IOException("Could not get GDAL Arrow stream at: foo"); + throw IOException("Could not get GDAL Arrow stream"); } ArrowSchema schema; if (result->stream.get_schema(&result->stream, &schema) != 0) { result->stream.release(&result->stream); GDALClose(dataset); - throw IOException("Could not get GDAL Arrow schema at: foo"); + throw IOException("Could not get GDAL Arrow schema"); } // Store the column types @@ -493,6 +563,26 @@ auto Progress(ClientContext &context, const FunctionData *b_data, const GlobalTa return MinValue(100.0 * (total / count), 100.0); } +//------------------------------------------------------------------------------------------------------------------ +// REPLACEMENT SCAN +//------------------------------------------------------------------------------------------------------------------ +auto ReplacementScan(ClientContext &, ReplacementScanInput &input, optional_ptr) + -> unique_ptr { + auto &table_name = input.table_name; + auto lower_name = StringUtil::Lower(table_name); + // Check if the table name ends with some common geospatial file extensions + if (StringUtil::EndsWith(lower_name, ".gpkg") || StringUtil::EndsWith(lower_name, ".fgb")) { + + auto table_function = make_uniq(); + vector> children; + children.push_back(make_uniq(Value(table_name))); + table_function->function = make_uniq("ST_Read", std::move(children)); + return std::move(table_function); + } + // else not something we can replace + return nullptr; +} + //---------------------------------------------------------------------------------------------------------------------- // REGISTER //---------------------------------------------------------------------------------------------------------------------- @@ -506,17 +596,18 @@ void Register(ExtensionLoader &loader) { read_func.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR); read_func.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); read_func.named_parameters["sibling_files"] = LogicalType::LIST(LogicalType::VARCHAR); - // read_func.named_parameters["spatial_filter_box"] = GeoTypes::BOX_2D(); - // read_func.named_parameters["spatial_filter"] = LogicalType::GEOMETRY(); read_func.named_parameters["layer"] = LogicalType::VARCHAR; - read_func.named_parameters["sequential_layer_scan"] = LogicalType::BOOLEAN; read_func.named_parameters["max_batch_size"] = LogicalType::INTEGER; read_func.named_parameters["keep_wkb"] = LogicalType::BOOLEAN; loader.RegisterFunction(read_func); + + auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance()); + config.replacement_scans.emplace_back(ReplacementScan); } } // namespace gdal_read + //====================================================================================================================== // GDAL COPY //====================================================================================================================== diff --git a/src/spatial/modules/main/spatial_functions_scalar.cpp b/src/spatial/modules/main/spatial_functions_scalar.cpp index 393fa972..861d2cd8 100644 --- a/src/spatial/modules/main/spatial_functions_scalar.cpp +++ b/src/spatial/modules/main/spatial_functions_scalar.cpp @@ -1045,7 +1045,7 @@ struct ST_AsHEXWKB { auto blob_size = size * 2; // every byte is rendered as two characters auto blob_str = StringVector::EmptyString(result, blob_size); - auto blob_ptr = blob_str.GetDataWriteable(); + auto blob_ptr = blob_str.GetDataWriteable(); idx_t str_idx = 0; for (idx_t i = 0; i < size; i++) { diff --git a/test/sql/geos/st_minimumrotatedrectangle.test b/test/sql/geos/st_minimumrotatedrectangle.test index 81791af5..06c31acf 100644 --- a/test/sql/geos/st_minimumrotatedrectangle.test +++ b/test/sql/geos/st_minimumrotatedrectangle.test @@ -1,3 +1,6 @@ +# name: test/sql/geos/st_minimumrotatedrectangle.test +# group: [geos] + require spatial # As per st_isvalid, we trust that GEOS knows what it's doing, so we're not interested in testing this too much From 68e3651072e6cee6223505ab2985192ed9a6d4c1 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 14 Nov 2025 20:31:42 +0100 Subject: [PATCH 23/41] adjust gdal errors --- src/spatial/modules/gdal/gdal_functions.cpp | 2 +- src/spatial/modules/gdal/gdal_module.cpp | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp index e9b52377..c181ee40 100644 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -94,7 +94,7 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector // Set additional default GDAL default options // This for OSM, but we don't know if we are reading OSM until we open the dataset, so just always set it for now. - result->dataset_options.AddString("INTERLEAVED_READING=YES"); + //result->dataset_options.AddString("INTERLEAVED_READING=YES"); // This is so taht we dont have to deal with chunking ourselves, let GDAL do it for us result->layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).c_str()); diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index 20aae834..2c2dea86 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -2038,26 +2038,32 @@ void RegisterGDALModule(ExtensionLoader &loader) { msg.erase(path_pos, 48); } + // GDAL Catches exceptions internally and passes them on to the handler again as CPLE_AppDefined + // So we don't add any extra information here or we end up with very long nested error messages. + // Using ErrorData we can parse the message part of DuckDB exceptions properly, and for other exceptions + // their error message will still be preserved as the "raw message". + ErrorData error_msg(raw_msg); + switch (code) { case CPLE_NoWriteAccess: - throw PermissionException("GDAL Error (%d): %s", code, msg); + throw PermissionException(error_msg.RawMessage()); case CPLE_UserInterrupt: throw InterruptException(); case CPLE_OutOfMemory: - throw OutOfMemoryException("GDAL Error (%d): %s", code, msg); + throw OutOfMemoryException(error_msg.RawMessage()); case CPLE_NotSupported: - throw NotImplementedException("GDAL Error (%d): %s", code, msg); + throw NotImplementedException(error_msg.RawMessage()); case CPLE_AssertionFailed: case CPLE_ObjectNull: - throw InternalException("GDAL Error (%d): %s", code, msg); + throw InternalException(error_msg.RawMessage()); case CPLE_IllegalArg: - throw InvalidInputException("GDAL Error (%d): %s", code, msg); + throw InvalidInputException( error_msg.RawMessage()); case CPLE_AppDefined: case CPLE_HttpResponse: case CPLE_FileIO: case CPLE_OpenFailed: default: - throw IOException("GDAL Error (%d): %s", code, msg); + throw IOException(error_msg.RawMessage()); } }); }); From 737763edfaee0fd875ced9a2c1bd5e31ab96f905 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 14 Nov 2025 21:39:21 +0100 Subject: [PATCH 24/41] fix bounds check in osm reader --- src/spatial/modules/osm/osm_module.cpp | 49 ++++++++++++++++---------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/spatial/modules/osm/osm_module.cpp b/src/spatial/modules/osm/osm_module.cpp index e278fc12..82322133 100644 --- a/src/spatial/modules/osm/osm_module.cpp +++ b/src/spatial/modules/osm/osm_module.cpp @@ -21,7 +21,7 @@ namespace { namespace pz = protozero; -static int32_t ReadInt32BigEndian(data_ptr_t ptr) { +int32_t ReadInt32BigEndian(data_ptr_t ptr) { return (ptr[0] << 24) | (ptr[1] << 16) | (ptr[2] << 8) | ptr[3]; } @@ -36,7 +36,7 @@ struct BindData final : TableFunctionData { } }; -static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, +unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, vector &return_types, vector &names) { // Create an enum type for all osm kinds @@ -113,7 +113,7 @@ struct FileBlock { } }; -static unique_ptr DecompressBlob(ClientContext &context, OsmBlob &blob) { +unique_ptr DecompressBlob(ClientContext &context, OsmBlob &blob) { auto &buffer_manager = BufferManager::GetBufferManager(context); pz::pbf_reader reader(reinterpret_cast(blob.data.get()), blob.size); @@ -189,16 +189,23 @@ class GlobalState final : public GlobalTableFunctionState { // serialized Blob message (size is given in the header) // Read the length of the BlobHeader - int32_t header_length_be = 0; - handle->Read((data_ptr_t)&header_length_be, sizeof(int32_t), offset); + char header_length_be[4]; + handle->Read(header_length_be, sizeof(int32_t), offset); offset += sizeof(int32_t); - int32_t header_length = ReadInt32BigEndian((data_ptr_t)&header_length_be); + + const auto header_length = ReadInt32BigEndian(data_ptr_cast(header_length_be)); + + // Sanity check + if (offset + header_length > file_size) { + throw ParserException("Unexpected end of file when reading BlobHeader"); + } // Read the BlobHeader auto header_buffer = buffer_manager.GetBufferAllocator().Allocate(header_length); handle->Read(header_buffer.get(), header_length, offset); + offset += header_length; - pz::pbf_reader reader((const char *)header_buffer.get(), header_length); + pz::pbf_reader reader(char_ptr_cast(header_buffer.get()), header_length); // 1 - type of the blob reader.next(1); @@ -215,21 +222,25 @@ class GlobalState final : public GlobalTableFunctionState { reader.next(3); auto blob_length = reader.get_int32(); // size of the next blob - offset += header_length; + + // Sanity check + if (offset + blob_length > file_size) { + throw ParserException("Unexpected end of file when reading Blob"); + } // Read the Blob auto blob_buffer = buffer_manager.GetBufferAllocator().Allocate(blob_length); handle->Read(blob_buffer.get(), blob_length, offset); - offset += blob_length; + bytes_read = offset; return make_uniq(type, std::move(blob_buffer), blob_length, blob_index++); } }; -static unique_ptr InitGlobal(ClientContext &context, TableFunctionInitInput &input) { - auto &bind_data = (BindData &)*input.bind_data; +unique_ptr InitGlobal(ClientContext &context, TableFunctionInitInput &input) { + auto &bind_data = input.bind_data->Cast(); auto &fs = FileSystem::GetFileSystem(context); auto file_name = bind_data.file_name; @@ -272,7 +283,7 @@ struct LocalState final : LocalTableFunctionState { lat_offset = 0; lon_offset = 0; - block_reader = pz::pbf_reader((const char *)block->data.get(), block->size); + block_reader = pz::pbf_reader(const_char_ptr_cast(block->data.get()), block->size); block_reader.next(1); // String table auto string_table_reader = block_reader.get_message(); while (string_table_reader.next(1)) { @@ -784,7 +795,7 @@ struct LocalState final : LocalTableFunctionState { } }; -static unique_ptr InitLocal(ExecutionContext &context, TableFunctionInitInput &input, +unique_ptr InitLocal(ExecutionContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *global_state) { auto &global = global_state->Cast(); const auto blob = global.GetNextBlob(context.client); @@ -797,7 +808,7 @@ static unique_ptr InitLocal(ExecutionContext &context, return std::move(result); } -static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { +void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { if (input.local_state == nullptr) { return; } @@ -823,13 +834,13 @@ static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk output.SetCardinality(row_id); } -static double Progress(ClientContext &context, const FunctionData *bind_data, +double Progress(ClientContext &context, const FunctionData *bind_data, const GlobalTableFunctionState *global_state) { const auto &state = global_state->Cast(); return state.GetProgress(); } -static OperatorPartitionData GetPartitionData(ClientContext &context, TableFunctionGetPartitionInput &input) { +OperatorPartitionData GetPartitionData(ClientContext &context, TableFunctionGetPartitionInput &input) { if (input.partition_info.RequiresPartitionColumns()) { throw InternalException("ST_ReadOSM::GetPartitionData: partition columns not supported"); } @@ -837,7 +848,7 @@ static OperatorPartitionData GetPartitionData(ClientContext &context, TableFunct return OperatorPartitionData(state.block->block_idx); } -static unique_ptr ReadOsmPBFReplacementScan(ClientContext &context, ReplacementScanInput &input, +unique_ptr ReadOsmPBFReplacementScan(ClientContext &context, ReplacementScanInput &input, optional_ptr data) { auto &table_name = input.table_name; // Check if the table name ends with .osm.pbf @@ -858,7 +869,7 @@ static unique_ptr ReadOsmPBFReplacementScan(ClientContext &context, Re // static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; -static constexpr const char *DOC_DESCRIPTION = R"( +constexpr const char *DOC_DESCRIPTION = R"( The `ST_ReadOsm()` table function enables reading compressed OpenStreetMap data directly from a `.osm.pbf` file. This function uses multithreading and zero-copy protobuf parsing which makes it a lot faster than using the `ST_Read()` OSM driver, however it only outputs the raw OSM data (Nodes, Ways, Relations), without constructing any geometries. For simple node entities (like PoI's) you can trivially construct POINT geometries, but it is also possible to construct LINESTRING and POLYGON geometries by manually joining refs and nodes together in SQL, although with available memory usually being a limiting factor. @@ -869,7 +880,7 @@ static constexpr const char *DOC_DESCRIPTION = R"( ``` )"; -static constexpr const char *DOC_EXAMPLE = R"( +constexpr const char *DOC_EXAMPLE = R"( SELECT * FROM ST_ReadOSM('tmp/data/germany.osm.pbf') WHERE tags['highway'] != [] From 9fdcf9f6d4e671465a70be3e2305810d2d71038d Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 18 Nov 2025 15:04:07 +0100 Subject: [PATCH 25/41] move filesystem --- src/spatial/modules/gdal/gdal_functions.cpp | 549 ++++++++++++++++++-- src/spatial/modules/gdal/gdal_module.cpp | 112 ++-- src/spatial/modules/osm/osm_module.cpp | 12 +- test/sql/gdal/st_read_gdb.test | 5 +- 4 files changed, 580 insertions(+), 98 deletions(-) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp index c181ee40..1a344e5f 100644 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -18,15 +18,426 @@ #include "cpl_vsi.h" #include "cpl_vsi_error.h" #include "cpl_vsi_virtual.h" +#include "duckdb/common/types/uuid.hpp" #include "duckdb/parser/tableref/table_function_ref.hpp" +#include + namespace duckdb { namespace { //====================================================================================================================== -// GDAL READ +// GDAL FILE //====================================================================================================================== +class DuckDBFileHandle final : public VSIVirtualHandle { +public: + explicit DuckDBFileHandle(unique_ptr file_handle_p) + : file_handle(std::move(file_handle_p)), is_eof(false) { + } + + vsi_l_offset Tell() override { + return static_cast(file_handle->SeekPosition()); + } + + int Seek(vsi_l_offset nOffset, int nWhence) override { + // Reset EOF flag on seek + is_eof = false; + + // Use the reset function instead to allow compressed file handles to rewind + // even if they don't support seeking + if (nWhence == SEEK_SET && nOffset == 0) { + file_handle->Reset(); + return 0; + } + + switch (nWhence) { + case SEEK_SET: + file_handle->Seek(nOffset); + return 0; + case SEEK_CUR: + file_handle->Seek(file_handle->SeekPosition() + nOffset); + return 0; + case SEEK_END: + file_handle->Seek(file_handle->GetFileSize() + nOffset); + return 0; + default: + return -1; + } + } + + size_t Read(void *buffer, size_t size, size_t count) override { + auto bytes_data = static_cast(buffer); + auto bytes_left = size * count; + + try { + while (bytes_left > 0) { + const auto bytes_read = file_handle->Read(bytes_data, bytes_left); + if (bytes_read == 0) { + break; + } + bytes_left -= bytes_read; + bytes_data += bytes_read; + } + } catch (...) { + if (bytes_left != 0) { + if (file_handle->SeekPosition() == file_handle->GetFileSize()) { + // Is at EOF! + is_eof = true; + } + } else { + // else, error! + // unfortunately, this version of GDAL cant distinguish between errors and reading less bytes + // its avaiable in 3.9.2, but we're stuck on 3.8.5 for now. + throw; + } + } + + return count - (bytes_left / size); + } + + int Eof() override { + return is_eof ? TRUE : FALSE; + } + + size_t Write(const void *buffer, size_t size, size_t count) override { + size_t written_bytes = 0; + try { + written_bytes = file_handle->Write(const_cast(buffer), size * count); + } catch (...) { + // ignore + } + return written_bytes / size; + } + + int Flush() override { + file_handle->Sync(); + return 0; + } + int Truncate(vsi_l_offset nNewSize) override { + file_handle->Truncate(static_cast(nNewSize)); + return 0; + } + int Close() override { + file_handle->Close(); + return 0; + } + +private: + unique_ptr file_handle = nullptr; + bool is_eof = false; +}; + +class DuckDBFileSystemHandler final : public VSIFilesystemHandler { +public: + DuckDBFileSystemHandler(string client_prefix, ClientContext &context) + : client_prefix(std::move(client_prefix)), context(context) {}; + + const char *StripPrefix(const char *pszFilename) const { + return pszFilename + client_prefix.size(); + } + string AddPrefix(const string &value) const { + return client_prefix + value; + } + + VSIVirtualHandle *Open(const char *gdal_file_path, const char *access, bool set_error, + CSLConstList /*papszoptions */) override { + + // Strip the prefix to get the real file path + const auto real_file_path = StripPrefix(gdal_file_path); + + // Get the DuckDB file system + auto &fs = FileSystem::GetFileSystem(context); + + // Determine the file open flags + FileOpenFlags flags; + const auto len = strlen(access); + if (access[0] == 'r') { + flags = FileFlags::FILE_FLAGS_READ; + if (len > 1 && access[1] == '+') { + flags |= FileFlags::FILE_FLAGS_WRITE; + } + if (len > 2 && access[2] == '+') { + // might be "rb+" + flags |= FileFlags::FILE_FLAGS_WRITE; + } + } else if (access[0] == 'w') { + flags = FileFlags::FILE_FLAGS_WRITE; + if (!fs.IsPipe(real_file_path)) { + flags |= FileFlags::FILE_FLAGS_FILE_CREATE_NEW; + } + if (len > 1 && access[1] == '+') { + flags |= FileFlags::FILE_FLAGS_READ; + } + if (len > 2 && access[2] == '+') { + // might be "wb+" + flags |= FileFlags::FILE_FLAGS_READ; + } + } else if (access[0] == 'a') { + flags = FileFlags::FILE_FLAGS_APPEND; + if (len > 1 && access[1] == '+') { + flags |= FileFlags::FILE_FLAGS_READ; + } + if (len > 2 && access[2] == '+') { + // might be "ab+" + flags |= FileFlags::FILE_FLAGS_READ; + } + } else { + throw InternalException("Unknown file access type"); + } + + try { + // If the file is remote and NOT in write mode, we can cache it. + if (fs.IsRemoteFile(real_file_path) && !flags.OpenForWriting() && !flags.OpenForAppending()) { + // Pass the direct IO flag to the file system since we use GDAL's caching instead + flags |= FileFlags::FILE_FLAGS_DIRECT_IO; + auto file = fs.OpenFile(real_file_path, flags | FileCompressionType::AUTO_DETECT); + return VSICreateCachedFile(new DuckDBFileHandle(std::move(file))); + } + + // Else, just open normally + auto file = fs.OpenFile(real_file_path, flags | FileCompressionType::AUTO_DETECT); + return new DuckDBFileHandle(std::move(file)); + + } catch (std::exception &ex) { + + // Extract error message from DuckDB + const ErrorData error_data(ex); + + // Failed to open file via DuckDB File System. If this doesnt have a VSI prefix we can return an error here. + if (strncmp(real_file_path, "/vsi", 4) != 0) { + if (set_error) { + VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); + } + return nullptr; + } + + // Fall back to GDAL instead (if external access is enabled) + if (!context.db->config.options.enable_external_access) { + if (set_error) { + VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); + } + return nullptr; + } + + const auto handler = VSIFileManager::GetHandler(real_file_path); + if (!handler) { + if (set_error) { + VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); + } + return nullptr; + } + + return handler->Open(real_file_path, access); + } + } + + int Stat(const char *gdal_file_name, VSIStatBufL *result, int n_flags) override { + auto real_file_path = StripPrefix(gdal_file_name); + auto &fs = FileSystem::GetFileSystem(context); + + memset(result, 0, sizeof(VSIStatBufL)); + + if (fs.IsPipe(real_file_path)) { + result->st_mode = S_IFCHR; + return 0; + } + + if (!(fs.FileExists(real_file_path) || + (!FileSystem::IsRemoteFile(real_file_path) && fs.DirectoryExists(real_file_path)))) { + return -1; + } + +#ifdef _WIN32 + if (!FileSystem::IsRemoteFile(real_file_path) && fs.DirectoryExists(real_file_path)) { + pstatbuf->st_mode = S_IFDIR; + return 0; + } +#endif + + FileOpenFlags flags; + flags |= FileFlags::FILE_FLAGS_READ; + flags |= FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS; + flags |= FileCompressionType::AUTO_DETECT; + + const auto file = fs.OpenFile(real_file_path, flags); + if (!file) { + return -1; + } + + try { + result->st_size = static_cast(fs.GetFileSize(*file)); + } catch (...) { + } + try { + result->st_mtime = Timestamp::ToTimeT(fs.GetLastModifiedTime(*file)); + } catch (...) { + } + try { + const auto type = file->GetType(); + switch (type) { + case FileType::FILE_TYPE_REGULAR: + result->st_mode = S_IFREG; + break; + case FileType::FILE_TYPE_DIR: + result->st_mode = S_IFDIR; + break; + case FileType::FILE_TYPE_CHARDEV: + result->st_mode = S_IFCHR; + break; + default: + // HTTPFS returns invalid type for everything basically. + if (FileSystem::IsRemoteFile(real_file_path)) { + result->st_mode = S_IFREG; + } else { + return -1; + } + } + } catch (...) { + } + return 0; + } + + bool IsLocal(const char *gdal_file_path) override { + const auto real_file_path = StripPrefix(gdal_file_path); + return !FileSystem::IsRemoteFile(real_file_path); + } + + int Mkdir(const char *pszDirname, long nMode) override { + auto &fs = FileSystem::GetFileSystem(context); + const auto dir_name = StripPrefix(pszDirname); + + fs.CreateDirectory(dir_name); + return 0; + } + + int Rmdir(const char *pszDirname) override { + auto &fs = FileSystem::GetFileSystem(context); + const auto dir_name = StripPrefix(pszDirname); + + fs.RemoveDirectory(dir_name); + return 0; + } + + int RmdirRecursive(const char *pszDirname) override { + auto &fs = FileSystem::GetFileSystem(context); + const auto dir_name = StripPrefix(pszDirname); + + fs.RemoveDirectory(dir_name); + return 0; + } + + char **ReadDirEx(const char *gdal_dir_name, int max_files) override { + auto &fs = FileSystem::GetFileSystem(context); + const auto dir_name = StripPrefix(gdal_dir_name); + + CPLStringList files; + auto files_count = 0; + fs.ListFiles(dir_name, [&](const string &file_name, bool is_dir) { + if (files_count >= max_files) { + return; + } + const auto tmp = AddPrefix(file_name); + files.AddString(tmp.c_str()); + files_count++; + }); + return files.StealList(); + } + + char **SiblingFiles(const char *gdal_file_path) override { + auto &fs = FileSystem::GetFileSystem(context); + + const auto real_file_path = StripPrefix(gdal_file_path); + + const auto real_file_stem = StringUtil::GetFileStem(real_file_path); + const auto base_file_path = fs.JoinPath(StringUtil::GetFilePath(real_file_path), real_file_stem); + const auto glob_file_path = base_file_path + ".*"; + + CPLStringList files; + for (auto &file : fs.Glob(glob_file_path)) { + files.AddString(AddPrefix(file.path).c_str()); + } + return files.StealList(); + } + + int HasOptimizedReadMultiRange(const char *pszPath) override { + return 0; + } + + int Unlink(const char *prefixed_file_name) override { + auto &fs = FileSystem::GetFileSystem(context); + const auto real_file_path = StripPrefix(prefixed_file_name); + try { + fs.RemoveFile(real_file_path); + return 0; + } catch (...) { + return -1; + } + } + + int Rename(const char *oldpath, const char *newpath) override { + auto &fs = FileSystem::GetFileSystem(context); + const auto real_old_path = StripPrefix(oldpath); + const auto real_new_path = StripPrefix(newpath); + + try { + fs.MoveFile(real_old_path, real_new_path); + return 0; + } catch (...) { + return -1; + } + } + + string GetCanonicalFilename(const std::string &osFilename) const override { + return StripPrefix(osFilename.c_str()); + } + +private: + string client_prefix; + ClientContext &context; +}; + +class DuckDBFileSystemPrefix final : public ClientContextState { +public: + explicit DuckDBFileSystemPrefix(ClientContext &context) : context(context) { + // Create a new random prefix for this client + client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID())); + + // Create a new file handler responding to this prefix + fs_handler = make_uniq(client_prefix, context); + + // Register the file handler + VSIFileManager::InstallHandler(client_prefix, fs_handler.get()); + } + + ~DuckDBFileSystemPrefix() override { + // Uninstall the file handler for this prefix + VSIFileManager::RemoveHandler(client_prefix); + } + + string AddPrefix(const string &value) const { + // If the user explicitly asked for a VSI prefix, we don't add our own + if (StringUtil::StartsWith(value, "/vsi")) { + if (!context.db->config.options.enable_external_access) { + throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value); + } + return value; + } + return client_prefix + value; + } + + static DuckDBFileSystemPrefix &GetOrCreate(ClientContext &context) { + return *context.registered_state->GetOrCreate("gdal", context); + } + +private: + ClientContext &context; + string client_prefix; + unique_ptr fs_handler; +}; +//====================================================================================================================== +// GDAL READ +//====================================================================================================================== namespace gdal_read { //---------------------------------------------------------------------------------------------------------------------- @@ -34,7 +445,8 @@ namespace gdal_read { //---------------------------------------------------------------------------------------------------------------------- class BindData final : public TableFunctionData { public: - string file_path; + string real_file_path; + string gdal_file_path; int layer_idx = 0; bool keep_wkb = false; @@ -61,8 +473,12 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector auto result = make_uniq(); + // Get the file prefix associated with this connection + const auto &file_prefix = DuckDBFileSystemPrefix::GetOrCreate(ctx); + // Pass file path - result->file_path = input.inputs[0].GetValue(); + result->real_file_path = input.inputs[0].GetValue(); + result->gdal_file_path = file_prefix.AddPrefix(result->real_file_path); // Parse options const auto dataset_options_param = input.named_parameters.find("open_options"); @@ -82,7 +498,7 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector const auto siblings_params = input.named_parameters.find("sibling_files"); if (siblings_params != input.named_parameters.end()) { for (auto ¶m : ListValue::GetChildren(siblings_params->second)) { - result->dataset_sibling.AddString(StringValue::Get(param).c_str()); + result->dataset_sibling.AddString(file_prefix.AddPrefix(StringValue::Get(param)).c_str()); } } @@ -105,14 +521,11 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector } // Open the dataset and get the Arrow schema - const auto dataset = - GDALOpenEx(result->file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, - result->dataset_drivers, - result->dataset_options, - result->dataset_sibling); + const auto dataset = GDALOpenEx(result->gdal_file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, + result->dataset_drivers, result->dataset_options, result->dataset_sibling); if (!dataset) { - throw IOException("Could not open GDAL dataset at: %s", result->file_path); + throw IOException("Could not open GDAL dataset at: %s", result->real_file_path); } ArrowSchema schema; @@ -122,7 +535,7 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector const auto layer_count = GDALDatasetGetLayerCount(dataset); if (layer_count <= 0) { - throw IOException("GDAL dataset contains no layers at: %s", result->file_path); + throw IOException("GDAL dataset contains no layers at: %s", result->real_file_path); } // Find layer @@ -164,7 +577,7 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector // Get the layer by index const auto layer = GDALDatasetGetLayer(dataset, result->layer_idx); if (!layer) { - throw IOException("Could not get GDAL layer at: %s", result->file_path); + throw IOException("Could not get GDAL layer at: %s", result->real_file_path); } // Estimate cardinality @@ -183,14 +596,22 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector // Get the layer geometry type if available result->layer_type = OGR_L_GetGeomType(layer); + // Check FID column + const auto fid_col = OGR_L_GetFIDColumn(layer); + if (fid_col && strcmp(fid_col, "") != 0) { + // Do not include the explicit FID if we already have it as a column + result->layer_options.AddString("INCLUDE_FID=NO"); + } + const auto geom_col_name = OGR_L_GetGeometryColumn(layer); + // Get the arrow stream if (!OGR_L_GetArrowStream(layer, &stream, result->layer_options.List())) { - throw IOException("Could not get GDAL Arrow stream at: %s", result->file_path); + throw IOException("Could not get GDAL Arrow stream at: %s", result->real_file_path); } // And the schema if (stream.get_schema(&stream, &schema) != 0) { - throw IOException("Could not get GDAL Arrow schema at: %s", result->file_path); + throw IOException("Could not get GDAL Arrow schema at: %s", result->real_file_path); } // Convert Arrow schema to DuckDB types @@ -204,7 +625,14 @@ auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector result->geometry_columns.insert(i); } - col_names.push_back(child_schema.name); + if (geom_col_name && (strcmp(geom_col_name, "") == 0) && (strcmp(child_schema.name, "wkb_geometry") == 0) && + !result->keep_wkb) { + // Rename the geometry column to "geom" unless keep_wkb is set + col_names.push_back("geom"); + } else { + col_names.push_back(child_schema.name); + } + col_types.push_back(std::move(duck_type)); } @@ -368,14 +796,11 @@ class GlobalState final : public GlobalTableFunctionState { auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique_ptr { auto &bdata = input.bind_data->Cast(); - const auto dataset = - GDALOpenEx(bdata.file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, - bdata.dataset_drivers, - bdata.dataset_options, - bdata.dataset_sibling); + const auto dataset = GDALOpenEx(bdata.gdal_file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, + bdata.dataset_drivers, bdata.dataset_options, bdata.dataset_sibling); if (!dataset) { - throw IOException("Could not open GDAL dataset at: foo"); + throw IOException("Could not open GDAL dataset at: %s", bdata.real_file_path); } auto result = make_uniq(); @@ -587,7 +1012,7 @@ auto ReplacementScan(ClientContext &, ReplacementScanInput &input, optional_ptr< // REGISTER //---------------------------------------------------------------------------------------------------------------------- void Register(ExtensionLoader &loader) { - TableFunction read_func("gdal_read", {LogicalType::VARCHAR}, Scan, Bind, InitGlobal); + TableFunction read_func("st_read", {LogicalType::VARCHAR}, Scan, Bind, InitGlobal); read_func.cardinality = Cardinality; read_func.statistics = Statistics; read_func.table_scan_progress = Progress; @@ -607,11 +1032,9 @@ void Register(ExtensionLoader &loader) { } } // namespace gdal_read - //====================================================================================================================== // GDAL COPY //====================================================================================================================== - namespace gdal_copy { //---------------------------------------------------------------------------------------------------------------------- @@ -619,7 +1042,8 @@ namespace gdal_copy { //---------------------------------------------------------------------------------------------------------------------- class BindData final : public TableFunctionData { public: - string file_path; + //string gdal_file_path; + //string real_file_path; string driver_name; string layer_name; @@ -669,8 +1093,8 @@ auto Bind(ClientContext &context, CopyFunctionBindInput &input, const vector &sql_types) -> unique_ptr { auto result = make_uniq(); - // Set file path - result->file_path = input.info.file_path; + // Set file pat + const auto &file_path = input.info.file_path; // Parse options for (auto &option : input.info.options) { @@ -736,7 +1160,7 @@ auto Bind(ClientContext &context, CopyFunctionBindInput &input, const vectorlayer_name.empty()) { auto &fs = FileSystem::GetFileSystem(context); - result->layer_name = fs.ExtractBaseName(result->file_path); + result->layer_name = fs.ExtractBaseName(file_path); } // Check the driver @@ -792,7 +1216,8 @@ class GlobalState final : public GlobalFunctionData { OGRSpatialReferenceH srs = nullptr; }; -auto InitGlobal(ClientContext &context, FunctionData &bdata_p, const string &path) -> unique_ptr { +auto InitGlobal(ClientContext &context, FunctionData &bdata_p, const string &real_file_path) + -> unique_ptr { auto &bdata = bdata_p.Cast(); auto result = make_uniq(); @@ -801,10 +1226,13 @@ auto InitGlobal(ClientContext &context, FunctionData &bdata_p, const string &pat throw InvalidInputException("Could not find GDAL driver: " + bdata.driver_name); } + const auto &file_prefix = DuckDBFileSystemPrefix::GetOrCreate(context); + const auto gdal_file_path = file_prefix.AddPrefix(real_file_path); + // Create Dataset - result->dataset = GDALCreate(driver, bdata.file_path.c_str(), 0, 0, 0, GDT_Unknown, bdata.driver_options); + result->dataset = GDALCreate(driver, gdal_file_path.c_str(), 0, 0, 0, GDT_Unknown, bdata.driver_options); if (!result->dataset) { - throw IOException("Could not create GDAL dataset at: " + bdata.file_path); + throw IOException("Could not create GDAL dataset at: " + real_file_path); } if (!bdata.target_srs.empty()) { @@ -814,11 +1242,11 @@ auto InitGlobal(ClientContext &context, FunctionData &bdata_p, const string &pat } // Create Layer - result->layer = GDALDatasetCreateLayer(result->dataset, bdata.driver_name.c_str(), result->srs, bdata.geometry_type, + result->layer = GDALDatasetCreateLayer(result->dataset, bdata.layer_name.c_str(), result->srs, bdata.geometry_type, bdata.layer_options); if (!result->layer) { - throw IOException("Could not create GDAL layer in dataset at: " + bdata.file_path); + throw IOException("Could not create GDAL layer in dataset at: " + real_file_path); } // Create fields for all children @@ -945,7 +1373,60 @@ void Register(ExtensionLoader &loader) { } // namespace void RegisterExtraFunction(ExtensionLoader &loader) { - gdal_copy::Register(loader); + + // Load GDAL (once) + static std::once_flag loaded; + std::call_once(loaded, [&]() { + // Register all embedded drivers (dont go looking for plugins) + OGRRegisterAllInternal(); + + // Set GDAL error handler + CPLSetErrorHandler([](CPLErr e, int code, const char *raw_msg) { + // DuckDB doesnt do warnings, so we only throw on errors + if (e != CE_Failure && e != CE_Fatal) { + return; + } + + // GDAL Catches exceptions internally and passes them on to the handler again as CPLE_AppDefined + // So we don't add any extra information here or we end up with very long nested error messages. + // Using ErrorData we can parse the message part of DuckDB exceptions properly, and for other exceptions + // their error message will still be preserved as the "raw message". + ErrorData error_data(raw_msg); + auto msg = error_data.RawMessage(); + + // If the error contains a /vsiduckdb-/ prefix, + // try to strip it off to make the errors more readable + auto path_pos = msg.find("/vsiduckdb-"); + if (path_pos != string::npos) { + // We found a path, strip it off + msg.erase(path_pos, 48); + } + + switch (code) { + case CPLE_NoWriteAccess: + throw PermissionException(msg); + case CPLE_UserInterrupt: + throw InterruptException(); + case CPLE_OutOfMemory: + throw OutOfMemoryException(msg); + case CPLE_NotSupported: + throw NotImplementedException(msg); + case CPLE_AssertionFailed: + case CPLE_ObjectNull: + throw InternalException(msg); + case CPLE_IllegalArg: + throw InvalidInputException(msg); + case CPLE_AppDefined: + case CPLE_HttpResponse: + case CPLE_FileIO: + case CPLE_OpenFailed: + default: + throw IOException(msg); + } + }); + }); + gdal_read::Register(loader); + gdal_copy::Register(loader); } } // namespace duckdb diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index 2c2dea86..c6056ff2 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -2015,65 +2015,65 @@ struct ST_Write { // Register Module //###################################################################################################################### void RegisterGDALModule(ExtensionLoader &loader) { - - // Load GDAL (once) - static std::once_flag loaded; - std::call_once(loaded, [&]() { - // Register all embedded drivers (dont go looking for plugins) - OGRRegisterAllInternal(); - - // Set GDAL error handler - CPLSetErrorHandler([](CPLErr e, int code, const char *raw_msg) { - // DuckDB doesnt do warnings, so we only throw on errors - if (e != CE_Failure && e != CE_Fatal) { - return; - } - - // If the error contains a /vsiduckdb-/ prefix, - // try to strip it off to make the errors more readable - auto msg = string(raw_msg); - auto path_pos = msg.find("/vsiduckdb-"); - if (path_pos != string::npos) { - // We found a path, strip it off - msg.erase(path_pos, 48); - } - - // GDAL Catches exceptions internally and passes them on to the handler again as CPLE_AppDefined - // So we don't add any extra information here or we end up with very long nested error messages. - // Using ErrorData we can parse the message part of DuckDB exceptions properly, and for other exceptions - // their error message will still be preserved as the "raw message". - ErrorData error_msg(raw_msg); - - switch (code) { - case CPLE_NoWriteAccess: - throw PermissionException(error_msg.RawMessage()); - case CPLE_UserInterrupt: - throw InterruptException(); - case CPLE_OutOfMemory: - throw OutOfMemoryException(error_msg.RawMessage()); - case CPLE_NotSupported: - throw NotImplementedException(error_msg.RawMessage()); - case CPLE_AssertionFailed: - case CPLE_ObjectNull: - throw InternalException(error_msg.RawMessage()); - case CPLE_IllegalArg: - throw InvalidInputException( error_msg.RawMessage()); - case CPLE_AppDefined: - case CPLE_HttpResponse: - case CPLE_FileIO: - case CPLE_OpenFailed: - default: - throw IOException(error_msg.RawMessage()); - } - }); - }); + // + // // Load GDAL (once) + // static std::once_flag loaded; + // std::call_once(loaded, [&]() { + // // Register all embedded drivers (dont go looking for plugins) + // OGRRegisterAllInternal(); + // + // // Set GDAL error handler + // CPLSetErrorHandler([](CPLErr e, int code, const char *raw_msg) { + // // DuckDB doesnt do warnings, so we only throw on errors + // if (e != CE_Failure && e != CE_Fatal) { + // return; + // } + // + // // If the error contains a /vsiduckdb-/ prefix, + // // try to strip it off to make the errors more readable + // auto msg = string(raw_msg); + // auto path_pos = msg.find("/vsiduckdb-"); + // if (path_pos != string::npos) { + // // We found a path, strip it off + // msg.erase(path_pos, 48); + // } + // + // // GDAL Catches exceptions internally and passes them on to the handler again as CPLE_AppDefined + // // So we don't add any extra information here or we end up with very long nested error messages. + // // Using ErrorData we can parse the message part of DuckDB exceptions properly, and for other exceptions + // // their error message will still be preserved as the "raw message". + // ErrorData error_msg(raw_msg); + // + // switch (code) { + // case CPLE_NoWriteAccess: + // throw PermissionException(error_msg.RawMessage()); + // case CPLE_UserInterrupt: + // throw InterruptException(); + // case CPLE_OutOfMemory: + // throw OutOfMemoryException(error_msg.RawMessage()); + // case CPLE_NotSupported: + // throw NotImplementedException(error_msg.RawMessage()); + // case CPLE_AssertionFailed: + // case CPLE_ObjectNull: + // throw InternalException(error_msg.RawMessage()); + // case CPLE_IllegalArg: + // throw InvalidInputException( error_msg.RawMessage()); + // case CPLE_AppDefined: + // case CPLE_HttpResponse: + // case CPLE_FileIO: + // case CPLE_OpenFailed: + // default: + // throw IOException(error_msg.RawMessage()); + // } + // }); + // }); RegisterExtraFunction(loader); - ST_Read::Register(loader); - ST_Read_Meta::Register(loader); - ST_Drivers::Register(loader); - ST_Write::Register(loader); + //ST_Read::Register(loader); + //ST_Read_Meta::Register(loader); + //ST_Drivers::Register(loader); + // ST_Write::Register(loader); } } // namespace duckdb diff --git a/src/spatial/modules/osm/osm_module.cpp b/src/spatial/modules/osm/osm_module.cpp index 82322133..26e4bc33 100644 --- a/src/spatial/modules/osm/osm_module.cpp +++ b/src/spatial/modules/osm/osm_module.cpp @@ -36,8 +36,8 @@ struct BindData final : TableFunctionData { } }; -unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { +unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, vector &return_types, + vector &names) { // Create an enum type for all osm kinds vector enum_values = {"node", "way", "relation", "changeset"}; @@ -222,7 +222,6 @@ class GlobalState final : public GlobalTableFunctionState { reader.next(3); auto blob_length = reader.get_int32(); // size of the next blob - // Sanity check if (offset + blob_length > file_size) { throw ParserException("Unexpected end of file when reading Blob"); @@ -796,7 +795,7 @@ struct LocalState final : LocalTableFunctionState { }; unique_ptr InitLocal(ExecutionContext &context, TableFunctionInitInput &input, - GlobalTableFunctionState *global_state) { + GlobalTableFunctionState *global_state) { auto &global = global_state->Cast(); const auto blob = global.GetNextBlob(context.client); if (blob == nullptr) { @@ -834,8 +833,7 @@ void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &outpu output.SetCardinality(row_id); } -double Progress(ClientContext &context, const FunctionData *bind_data, - const GlobalTableFunctionState *global_state) { +double Progress(ClientContext &context, const FunctionData *bind_data, const GlobalTableFunctionState *global_state) { const auto &state = global_state->Cast(); return state.GetProgress(); } @@ -849,7 +847,7 @@ OperatorPartitionData GetPartitionData(ClientContext &context, TableFunctionGetP } unique_ptr ReadOsmPBFReplacementScan(ClientContext &context, ReplacementScanInput &input, - optional_ptr data) { + optional_ptr data) { auto &table_name = input.table_name; // Check if the table name ends with .osm.pbf if (!StringUtil::EndsWith(StringUtil::Lower(table_name), ".osm.pbf")) { diff --git a/test/sql/gdal/st_read_gdb.test b/test/sql/gdal/st_read_gdb.test index 9a2e3a69..58b040ec 100644 --- a/test/sql/gdal/st_read_gdb.test +++ b/test/sql/gdal/st_read_gdb.test @@ -1,3 +1,6 @@ +# name: test/sql/gdal/st_read_gdb.test +# group: [gdal] + require spatial statement error @@ -8,7 +11,7 @@ OpenFileGDB requires 'GEOMETRY_TYPE' parameter to be set when writing statement error COPY (SELECT ST_Point(1,2) as geom, 10 as i) TO '__TEST_DIR__/test_fail.gdb' WITH (FORMAT 'GDAL', DRIVER 'OpenFileGDB', GEOMETRY_TYPE 'LINESTRING'); ---- -Expected all geometries to be of type 'LINESTRING', but got one of type 'POINT' +Not implemented Error: Can only insert a LineString/MultiLineString/CircularString/CompoundCurve/MultiCurve in a esriGeometryLine layer statement ok COPY (SELECT ST_Point(1,2) as geom, 10 as i) TO '__TEST_DIR__/test.gdb' WITH (FORMAT 'GDAL', DRIVER 'OpenFileGDB', GEOMETRY_TYPE 'POINT'); From c1f6acd6948ef574b328cf755e3302ac565a31eb Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 18 Nov 2025 15:17:53 +0100 Subject: [PATCH 26/41] dont remote cache --- src/spatial/modules/gdal/gdal_functions.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp index 1a344e5f..8025c1cb 100644 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ b/src/spatial/modules/gdal/gdal_functions.cpp @@ -32,7 +32,7 @@ namespace { class DuckDBFileHandle final : public VSIVirtualHandle { public: explicit DuckDBFileHandle(unique_ptr file_handle_p) - : file_handle(std::move(file_handle_p)), is_eof(false) { + : file_handle(std::move(file_handle_p)), is_eof(false), can_seek(file_handle->CanSeek()) { } vsi_l_offset Tell() override { @@ -125,6 +125,7 @@ class DuckDBFileHandle final : public VSIVirtualHandle { private: unique_ptr file_handle = nullptr; bool is_eof = false; + bool can_seek = false; }; class DuckDBFileSystemHandler final : public VSIFilesystemHandler { @@ -186,15 +187,6 @@ class DuckDBFileSystemHandler final : public VSIFilesystemHandler { } try { - // If the file is remote and NOT in write mode, we can cache it. - if (fs.IsRemoteFile(real_file_path) && !flags.OpenForWriting() && !flags.OpenForAppending()) { - // Pass the direct IO flag to the file system since we use GDAL's caching instead - flags |= FileFlags::FILE_FLAGS_DIRECT_IO; - auto file = fs.OpenFile(real_file_path, flags | FileCompressionType::AUTO_DETECT); - return VSICreateCachedFile(new DuckDBFileHandle(std::move(file))); - } - - // Else, just open normally auto file = fs.OpenFile(real_file_path, flags | FileCompressionType::AUTO_DETECT); return new DuckDBFileHandle(std::move(file)); @@ -387,10 +379,6 @@ class DuckDBFileSystemHandler final : public VSIFilesystemHandler { } } - string GetCanonicalFilename(const std::string &osFilename) const override { - return StripPrefix(osFilename.c_str()); - } - private: string client_prefix; ClientContext &context; From 0dd525df8e716b37665b6f13b8fa938cb04cae4f Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 18 Nov 2025 15:53:36 +0100 Subject: [PATCH 27/41] move back into gdal_module --- src/spatial/modules/gdal/CMakeLists.txt | 1 - src/spatial/modules/gdal/gdal_functions.cpp | 1420 --------- src/spatial/modules/gdal/gdal_module.cpp | 2892 +++++++++---------- 3 files changed, 1318 insertions(+), 2995 deletions(-) delete mode 100644 src/spatial/modules/gdal/gdal_functions.cpp diff --git a/src/spatial/modules/gdal/CMakeLists.txt b/src/spatial/modules/gdal/CMakeLists.txt index 01fcda1f..9942e0f2 100644 --- a/src/spatial/modules/gdal/CMakeLists.txt +++ b/src/spatial/modules/gdal/CMakeLists.txt @@ -1,4 +1,3 @@ set(EXTENSION_SOURCES ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/gdal_module.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/gdal_functions.cpp PARENT_SCOPE) diff --git a/src/spatial/modules/gdal/gdal_functions.cpp b/src/spatial/modules/gdal/gdal_functions.cpp deleted file mode 100644 index 8025c1cb..00000000 --- a/src/spatial/modules/gdal/gdal_functions.cpp +++ /dev/null @@ -1,1420 +0,0 @@ -#include "duckdb/main/extension/extension_loader.hpp" -#include "duckdb/function/copy_function.hpp" -#include "duckdb/function/table/arrow.hpp" -#include "duckdb/common/arrow/arrow_converter.hpp" -#include "duckdb/common/arrow/arrow.hpp" -#include "duckdb/main/database.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" -#include "duckdb/planner/expression/bound_constant_expression.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" - -#include "gdal.h" -#include "ogr_core.h" -#include "ogr_api.h" -#include "ogr_srs_api.h" - -#include "cpl_string.h" -#include "cpl_vsi.h" -#include "cpl_vsi_error.h" -#include "cpl_vsi_virtual.h" -#include "duckdb/common/types/uuid.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" - -#include - -namespace duckdb { -namespace { - -//====================================================================================================================== -// GDAL FILE -//====================================================================================================================== -class DuckDBFileHandle final : public VSIVirtualHandle { -public: - explicit DuckDBFileHandle(unique_ptr file_handle_p) - : file_handle(std::move(file_handle_p)), is_eof(false), can_seek(file_handle->CanSeek()) { - } - - vsi_l_offset Tell() override { - return static_cast(file_handle->SeekPosition()); - } - - int Seek(vsi_l_offset nOffset, int nWhence) override { - // Reset EOF flag on seek - is_eof = false; - - // Use the reset function instead to allow compressed file handles to rewind - // even if they don't support seeking - if (nWhence == SEEK_SET && nOffset == 0) { - file_handle->Reset(); - return 0; - } - - switch (nWhence) { - case SEEK_SET: - file_handle->Seek(nOffset); - return 0; - case SEEK_CUR: - file_handle->Seek(file_handle->SeekPosition() + nOffset); - return 0; - case SEEK_END: - file_handle->Seek(file_handle->GetFileSize() + nOffset); - return 0; - default: - return -1; - } - } - - size_t Read(void *buffer, size_t size, size_t count) override { - auto bytes_data = static_cast(buffer); - auto bytes_left = size * count; - - try { - while (bytes_left > 0) { - const auto bytes_read = file_handle->Read(bytes_data, bytes_left); - if (bytes_read == 0) { - break; - } - bytes_left -= bytes_read; - bytes_data += bytes_read; - } - } catch (...) { - if (bytes_left != 0) { - if (file_handle->SeekPosition() == file_handle->GetFileSize()) { - // Is at EOF! - is_eof = true; - } - } else { - // else, error! - // unfortunately, this version of GDAL cant distinguish between errors and reading less bytes - // its avaiable in 3.9.2, but we're stuck on 3.8.5 for now. - throw; - } - } - - return count - (bytes_left / size); - } - - int Eof() override { - return is_eof ? TRUE : FALSE; - } - - size_t Write(const void *buffer, size_t size, size_t count) override { - size_t written_bytes = 0; - try { - written_bytes = file_handle->Write(const_cast(buffer), size * count); - } catch (...) { - // ignore - } - return written_bytes / size; - } - - int Flush() override { - file_handle->Sync(); - return 0; - } - int Truncate(vsi_l_offset nNewSize) override { - file_handle->Truncate(static_cast(nNewSize)); - return 0; - } - int Close() override { - file_handle->Close(); - return 0; - } - -private: - unique_ptr file_handle = nullptr; - bool is_eof = false; - bool can_seek = false; -}; - -class DuckDBFileSystemHandler final : public VSIFilesystemHandler { -public: - DuckDBFileSystemHandler(string client_prefix, ClientContext &context) - : client_prefix(std::move(client_prefix)), context(context) {}; - - const char *StripPrefix(const char *pszFilename) const { - return pszFilename + client_prefix.size(); - } - string AddPrefix(const string &value) const { - return client_prefix + value; - } - - VSIVirtualHandle *Open(const char *gdal_file_path, const char *access, bool set_error, - CSLConstList /*papszoptions */) override { - - // Strip the prefix to get the real file path - const auto real_file_path = StripPrefix(gdal_file_path); - - // Get the DuckDB file system - auto &fs = FileSystem::GetFileSystem(context); - - // Determine the file open flags - FileOpenFlags flags; - const auto len = strlen(access); - if (access[0] == 'r') { - flags = FileFlags::FILE_FLAGS_READ; - if (len > 1 && access[1] == '+') { - flags |= FileFlags::FILE_FLAGS_WRITE; - } - if (len > 2 && access[2] == '+') { - // might be "rb+" - flags |= FileFlags::FILE_FLAGS_WRITE; - } - } else if (access[0] == 'w') { - flags = FileFlags::FILE_FLAGS_WRITE; - if (!fs.IsPipe(real_file_path)) { - flags |= FileFlags::FILE_FLAGS_FILE_CREATE_NEW; - } - if (len > 1 && access[1] == '+') { - flags |= FileFlags::FILE_FLAGS_READ; - } - if (len > 2 && access[2] == '+') { - // might be "wb+" - flags |= FileFlags::FILE_FLAGS_READ; - } - } else if (access[0] == 'a') { - flags = FileFlags::FILE_FLAGS_APPEND; - if (len > 1 && access[1] == '+') { - flags |= FileFlags::FILE_FLAGS_READ; - } - if (len > 2 && access[2] == '+') { - // might be "ab+" - flags |= FileFlags::FILE_FLAGS_READ; - } - } else { - throw InternalException("Unknown file access type"); - } - - try { - auto file = fs.OpenFile(real_file_path, flags | FileCompressionType::AUTO_DETECT); - return new DuckDBFileHandle(std::move(file)); - - } catch (std::exception &ex) { - - // Extract error message from DuckDB - const ErrorData error_data(ex); - - // Failed to open file via DuckDB File System. If this doesnt have a VSI prefix we can return an error here. - if (strncmp(real_file_path, "/vsi", 4) != 0) { - if (set_error) { - VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); - } - return nullptr; - } - - // Fall back to GDAL instead (if external access is enabled) - if (!context.db->config.options.enable_external_access) { - if (set_error) { - VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); - } - return nullptr; - } - - const auto handler = VSIFileManager::GetHandler(real_file_path); - if (!handler) { - if (set_error) { - VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); - } - return nullptr; - } - - return handler->Open(real_file_path, access); - } - } - - int Stat(const char *gdal_file_name, VSIStatBufL *result, int n_flags) override { - auto real_file_path = StripPrefix(gdal_file_name); - auto &fs = FileSystem::GetFileSystem(context); - - memset(result, 0, sizeof(VSIStatBufL)); - - if (fs.IsPipe(real_file_path)) { - result->st_mode = S_IFCHR; - return 0; - } - - if (!(fs.FileExists(real_file_path) || - (!FileSystem::IsRemoteFile(real_file_path) && fs.DirectoryExists(real_file_path)))) { - return -1; - } - -#ifdef _WIN32 - if (!FileSystem::IsRemoteFile(real_file_path) && fs.DirectoryExists(real_file_path)) { - pstatbuf->st_mode = S_IFDIR; - return 0; - } -#endif - - FileOpenFlags flags; - flags |= FileFlags::FILE_FLAGS_READ; - flags |= FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS; - flags |= FileCompressionType::AUTO_DETECT; - - const auto file = fs.OpenFile(real_file_path, flags); - if (!file) { - return -1; - } - - try { - result->st_size = static_cast(fs.GetFileSize(*file)); - } catch (...) { - } - try { - result->st_mtime = Timestamp::ToTimeT(fs.GetLastModifiedTime(*file)); - } catch (...) { - } - try { - const auto type = file->GetType(); - switch (type) { - case FileType::FILE_TYPE_REGULAR: - result->st_mode = S_IFREG; - break; - case FileType::FILE_TYPE_DIR: - result->st_mode = S_IFDIR; - break; - case FileType::FILE_TYPE_CHARDEV: - result->st_mode = S_IFCHR; - break; - default: - // HTTPFS returns invalid type for everything basically. - if (FileSystem::IsRemoteFile(real_file_path)) { - result->st_mode = S_IFREG; - } else { - return -1; - } - } - } catch (...) { - } - return 0; - } - - bool IsLocal(const char *gdal_file_path) override { - const auto real_file_path = StripPrefix(gdal_file_path); - return !FileSystem::IsRemoteFile(real_file_path); - } - - int Mkdir(const char *pszDirname, long nMode) override { - auto &fs = FileSystem::GetFileSystem(context); - const auto dir_name = StripPrefix(pszDirname); - - fs.CreateDirectory(dir_name); - return 0; - } - - int Rmdir(const char *pszDirname) override { - auto &fs = FileSystem::GetFileSystem(context); - const auto dir_name = StripPrefix(pszDirname); - - fs.RemoveDirectory(dir_name); - return 0; - } - - int RmdirRecursive(const char *pszDirname) override { - auto &fs = FileSystem::GetFileSystem(context); - const auto dir_name = StripPrefix(pszDirname); - - fs.RemoveDirectory(dir_name); - return 0; - } - - char **ReadDirEx(const char *gdal_dir_name, int max_files) override { - auto &fs = FileSystem::GetFileSystem(context); - const auto dir_name = StripPrefix(gdal_dir_name); - - CPLStringList files; - auto files_count = 0; - fs.ListFiles(dir_name, [&](const string &file_name, bool is_dir) { - if (files_count >= max_files) { - return; - } - const auto tmp = AddPrefix(file_name); - files.AddString(tmp.c_str()); - files_count++; - }); - return files.StealList(); - } - - char **SiblingFiles(const char *gdal_file_path) override { - auto &fs = FileSystem::GetFileSystem(context); - - const auto real_file_path = StripPrefix(gdal_file_path); - - const auto real_file_stem = StringUtil::GetFileStem(real_file_path); - const auto base_file_path = fs.JoinPath(StringUtil::GetFilePath(real_file_path), real_file_stem); - const auto glob_file_path = base_file_path + ".*"; - - CPLStringList files; - for (auto &file : fs.Glob(glob_file_path)) { - files.AddString(AddPrefix(file.path).c_str()); - } - return files.StealList(); - } - - int HasOptimizedReadMultiRange(const char *pszPath) override { - return 0; - } - - int Unlink(const char *prefixed_file_name) override { - auto &fs = FileSystem::GetFileSystem(context); - const auto real_file_path = StripPrefix(prefixed_file_name); - try { - fs.RemoveFile(real_file_path); - return 0; - } catch (...) { - return -1; - } - } - - int Rename(const char *oldpath, const char *newpath) override { - auto &fs = FileSystem::GetFileSystem(context); - const auto real_old_path = StripPrefix(oldpath); - const auto real_new_path = StripPrefix(newpath); - - try { - fs.MoveFile(real_old_path, real_new_path); - return 0; - } catch (...) { - return -1; - } - } - -private: - string client_prefix; - ClientContext &context; -}; - -class DuckDBFileSystemPrefix final : public ClientContextState { -public: - explicit DuckDBFileSystemPrefix(ClientContext &context) : context(context) { - // Create a new random prefix for this client - client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID())); - - // Create a new file handler responding to this prefix - fs_handler = make_uniq(client_prefix, context); - - // Register the file handler - VSIFileManager::InstallHandler(client_prefix, fs_handler.get()); - } - - ~DuckDBFileSystemPrefix() override { - // Uninstall the file handler for this prefix - VSIFileManager::RemoveHandler(client_prefix); - } - - string AddPrefix(const string &value) const { - // If the user explicitly asked for a VSI prefix, we don't add our own - if (StringUtil::StartsWith(value, "/vsi")) { - if (!context.db->config.options.enable_external_access) { - throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value); - } - return value; - } - return client_prefix + value; - } - - static DuckDBFileSystemPrefix &GetOrCreate(ClientContext &context) { - return *context.registered_state->GetOrCreate("gdal", context); - } - -private: - ClientContext &context; - string client_prefix; - unique_ptr fs_handler; -}; - -//====================================================================================================================== -// GDAL READ -//====================================================================================================================== -namespace gdal_read { - -//---------------------------------------------------------------------------------------------------------------------- -// BIND -//---------------------------------------------------------------------------------------------------------------------- -class BindData final : public TableFunctionData { -public: - string real_file_path; - string gdal_file_path; - - int layer_idx = 0; - bool keep_wkb = false; - - CPLStringList layer_options; - CPLStringList dataset_options; - CPLStringList dataset_sibling; - CPLStringList dataset_drivers; - - int64_t estimated_cardinality = 0; - unordered_set geometry_columns = {}; - - bool can_filter = false; - bool has_extent = false; - bool has_filter = false; - OGREnvelope layer_extent; - OGREnvelope layer_filter; - - OGRwkbGeometryType layer_type = wkbUnknown; -}; - -auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector &col_types, vector &col_names) - -> unique_ptr { - - auto result = make_uniq(); - - // Get the file prefix associated with this connection - const auto &file_prefix = DuckDBFileSystemPrefix::GetOrCreate(ctx); - - // Pass file path - result->real_file_path = input.inputs[0].GetValue(); - result->gdal_file_path = file_prefix.AddPrefix(result->real_file_path); - - // Parse options - const auto dataset_options_param = input.named_parameters.find("open_options"); - if (dataset_options_param != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(dataset_options_param->second)) { - result->dataset_options.AddString(StringValue::Get(param).c_str()); - } - } - - const auto drivers_param = input.named_parameters.find("allowed_drivers"); - if (drivers_param != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(drivers_param->second)) { - result->dataset_drivers.AddString(StringValue::Get(param).c_str()); - } - } - - const auto siblings_params = input.named_parameters.find("sibling_files"); - if (siblings_params != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(siblings_params->second)) { - result->dataset_sibling.AddString(file_prefix.AddPrefix(StringValue::Get(param)).c_str()); - } - } - - const auto keep_wkb_param = input.named_parameters.find("keep_wkb"); - if (keep_wkb_param != input.named_parameters.end()) { - result->keep_wkb = BooleanValue::Get(keep_wkb_param->second); - } - - // Set additional default GDAL default options - - // This for OSM, but we don't know if we are reading OSM until we open the dataset, so just always set it for now. - //result->dataset_options.AddString("INTERLEAVED_READING=YES"); - - // This is so taht we dont have to deal with chunking ourselves, let GDAL do it for us - result->layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).c_str()); - - // We always want GeoArrow geometry which DuckDB knows how to convert to GEOMETRY type, unless `keep_wkb` is set - if (!result->keep_wkb) { - result->layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); - } - - // Open the dataset and get the Arrow schema - const auto dataset = GDALOpenEx(result->gdal_file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, - result->dataset_drivers, result->dataset_options, result->dataset_sibling); - - if (!dataset) { - throw IOException("Could not open GDAL dataset at: %s", result->real_file_path); - } - - ArrowSchema schema; - ArrowArrayStream stream; - - try { - - const auto layer_count = GDALDatasetGetLayerCount(dataset); - if (layer_count <= 0) { - throw IOException("GDAL dataset contains no layers at: %s", result->real_file_path); - } - - // Find layer - const auto layer_param = input.named_parameters.find("layer"); - - if (layer_param != input.named_parameters.end()) { - if (layer_param->second.type() == LogicalType::INTEGER) { - // Find layer by index - const auto layer_idx = IntegerValue::Get(layer_param->second); - if (layer_idx < 0) { - throw BinderException("Layer index must be positive"); - } - if (layer_idx > layer_count) { - throw BinderException( - StringUtil::Format("Layer index out of range (%s > %s)", layer_idx, layer_count)); - } - result->layer_idx = layer_idx; - } else if (layer_param->second.type() == LogicalType::VARCHAR) { - // Find layer by name - const auto &layer_name = StringValue::Get(layer_param->second); - auto found = false; - for (int i = 0; i < layer_count; i++) { - const auto layer = GDALDatasetGetLayer(dataset, i); - if (!layer) { - continue; - } - if (OGR_L_GetName(layer) == layer_name) { - result->layer_idx = i; - found = true; - break; - } - } - if (!found) { - throw BinderException("Could not find layer with name: %s", layer_name); - } - } - } - - // Get the layer by index - const auto layer = GDALDatasetGetLayer(dataset, result->layer_idx); - if (!layer) { - throw IOException("Could not get GDAL layer at: %s", result->real_file_path); - } - - // Estimate cardinality - result->estimated_cardinality = OGR_L_GetFeatureCount(layer, 0); - - // Get extent (Only if spatial filter is not pushed down!) - if (OGR_L_GetExtent(layer, &result->layer_extent, 0) == OGRERR_NONE) { - result->has_extent = true; - } - - // Check if fast spatial filtering is available - if (OGR_L_TestCapability(layer, OLCFastSpatialFilter)) { - result->can_filter = true; - } - - // Get the layer geometry type if available - result->layer_type = OGR_L_GetGeomType(layer); - - // Check FID column - const auto fid_col = OGR_L_GetFIDColumn(layer); - if (fid_col && strcmp(fid_col, "") != 0) { - // Do not include the explicit FID if we already have it as a column - result->layer_options.AddString("INCLUDE_FID=NO"); - } - const auto geom_col_name = OGR_L_GetGeometryColumn(layer); - - // Get the arrow stream - if (!OGR_L_GetArrowStream(layer, &stream, result->layer_options.List())) { - throw IOException("Could not get GDAL Arrow stream at: %s", result->real_file_path); - } - - // And the schema - if (stream.get_schema(&stream, &schema) != 0) { - throw IOException("Could not get GDAL Arrow schema at: %s", result->real_file_path); - } - - // Convert Arrow schema to DuckDB types - for (int64_t i = 0; i < schema.n_children; i++) { - auto &child_schema = *schema.children[i]; - const auto gdal_type = ArrowType::GetTypeFromSchema(ctx.db->config, child_schema); - auto duck_type = gdal_type->GetDuckType(); - - // Track geometry columns to compute stats later - if (duck_type.id() == LogicalTypeId::GEOMETRY) { - result->geometry_columns.insert(i); - } - - if (geom_col_name && (strcmp(geom_col_name, "") == 0) && (strcmp(child_schema.name, "wkb_geometry") == 0) && - !result->keep_wkb) { - // Rename the geometry column to "geom" unless keep_wkb is set - col_names.push_back("geom"); - } else { - col_names.push_back(child_schema.name); - } - - col_types.push_back(std::move(duck_type)); - } - - } catch (...) { - // Release stream, schema and dataset - if (schema.release) { - schema.release(&schema); - } - if (stream.release) { - stream.release(&stream); - } - if (dataset) { - GDALClose(dataset); - } - // Re-throw exception - throw; - } - - if (schema.release) { - schema.release(&schema); - } - if (stream.release) { - stream.release(&stream); - } - if (dataset) { - GDALClose(dataset); - } - - return std::move(result); -} - -//---------------------------------------------------------------------------------------------------------------------- -// FILTER (EXPRESSION) PUSHDOWN -//---------------------------------------------------------------------------------------------------------------------- -auto Pushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data, vector> &filters) - -> void { - - auto &bdata = bind_data->Cast(); - - if (!bdata.can_filter) { - return; - } - - if (bdata.geometry_columns.size() != 1) { - return; // Only optimize if there is a single geometry column - } - - optional_idx geom_filter_idx = optional_idx::Invalid(); - - for (idx_t expr_idx = 0; expr_idx < filters.size(); expr_idx++) { - const auto &expr = filters[expr_idx]; - - if (expr->GetExpressionType() != ExpressionType::BOUND_FUNCTION) { - continue; - } - if (expr->return_type != LogicalType::BOOLEAN) { - continue; - } - const auto &func = expr->Cast(); - if (func.children.size() != 2) { - continue; - } - - if (func.children[0]->return_type.id() != LogicalTypeId::GEOMETRY || - func.children[1]->return_type.id() != LogicalTypeId::GEOMETRY) { - continue; - } - - // The set of geometry predicates that can be optimized using the bounding box - static constexpr const char *geometry_predicates[2] = {"&&", "st_intersects_extent"}; - - auto found = false; - for (const auto &name : geometry_predicates) { - if (StringUtil::CIEquals(func.function.name.c_str(), name)) { - found = true; - break; - } - } - if (!found) { - // Not a geometry predicate we can optimize - continue; - } - - const auto lhs_kind = func.children[0]->GetExpressionType(); - const auto rhs_kind = func.children[1]->GetExpressionType(); - - const auto lhs_is_const = - lhs_kind == ExpressionType::VALUE_CONSTANT && rhs_kind == ExpressionType::BOUND_COLUMN_REF; - const auto rhs_is_const = - rhs_kind == ExpressionType::VALUE_CONSTANT && lhs_kind == ExpressionType::BOUND_COLUMN_REF; - - if (lhs_is_const == rhs_is_const) { - // Both sides are constant or both sides are column refs - continue; - } - - auto &constant_expr = func.children[lhs_is_const ? 0 : 1]->Cast(); - auto &geometry_expr = func.children[lhs_is_const ? 1 : 0]->Cast(); - - if (constant_expr.value.type().id() != LogicalTypeId::GEOMETRY) { - // Constant is not geometry - continue; - } - if (constant_expr.value.IsNull()) { - // Constant is NULL - continue; - } - if (geometry_expr.alias != "geom") { - // Not the geometry column - continue; - } - - auto geom_extent = GeometryExtent::Empty(); - auto geom_binary = string_t(StringValue::Get(constant_expr.value)); - - if (Geometry::GetExtent(geom_binary, geom_extent)) { - bdata.has_filter = true; - bdata.layer_filter.MinX = geom_extent.x_min; - bdata.layer_filter.MinY = geom_extent.y_min; - bdata.layer_filter.MaxX = geom_extent.x_max; - bdata.layer_filter.MaxY = geom_extent.y_max; - } - - // Set the index so we can remove it later - // We can __ONLY__ do this if the filter predicate is "&&" or "st_intersects_extent" - // as other predicates may require exact geometry evaluation, the filter cannot be fully removed - geom_filter_idx = expr_idx; - break; - } - - if (geom_filter_idx != optional_idx::Invalid()) { - // Remove the filter from the list - filters.erase_at(geom_filter_idx.GetIndex()); - } -} - -//---------------------------------------------------------------------------------------------------------------------- -// GLOBAL STATE -//---------------------------------------------------------------------------------------------------------------------- -class GlobalState final : public GlobalTableFunctionState { -public: - ~GlobalState() override { - if (dataset) { - GDALClose(dataset); - dataset = nullptr; - } - - if (stream.release) { - stream.release(&stream); - } - } - - GDALDatasetH dataset; - CPLStringList layer_options; - OGRLayerH layer; - ArrowArrayStream stream; - vector> col_types; - atomic features_read = {0}; -}; - -auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique_ptr { - auto &bdata = input.bind_data->Cast(); - - const auto dataset = GDALOpenEx(bdata.gdal_file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, - bdata.dataset_drivers, bdata.dataset_options, bdata.dataset_sibling); - - if (!dataset) { - throw IOException("Could not open GDAL dataset at: %s", bdata.real_file_path); - } - - auto result = make_uniq(); - result->dataset = dataset; - result->layer_options = bdata.layer_options; - - const auto driver = GDALGetDatasetDriver(dataset); - if (strcmp(GDALGetDriverShortName(driver), "OSM") != 0) { - // Get the layer by index - result->layer = GDALDatasetGetLayer(dataset, bdata.layer_idx); - } else { - // Special case for OSM, which requires sequential reading of layers - const auto layer_count = GDALDatasetGetLayerCount(dataset); - for (int i = 0; i < layer_count; i++) { - result->layer = GDALDatasetGetLayer(dataset, i); - if (i == bdata.layer_idx) { - // desired layer found - break; - } - - // else scan through and empty the layer - OGRFeatureH feature; - while ((feature = OGR_L_GetNextFeature(result->layer)) != nullptr) { - OGR_F_Destroy(feature); - } - } - } - - // Set the filter, if we got one - if (bdata.has_filter) { - OGR_L_SetSpatialFilterRect(result->layer, bdata.layer_filter.MinX, bdata.layer_filter.MinY, - bdata.layer_filter.MaxX, bdata.layer_filter.MaxY); - } - - CPLStringList layer_options; - layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).data()); - layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); - - // Open the Arrow stream - if (!OGR_L_GetArrowStream(result->layer, &result->stream, result->layer_options.List())) { - GDALClose(dataset); - throw IOException("Could not get GDAL Arrow stream"); - } - - ArrowSchema schema; - if (result->stream.get_schema(&result->stream, &schema) != 0) { - result->stream.release(&result->stream); - GDALClose(dataset); - throw IOException("Could not get GDAL Arrow schema"); - } - - // Store the column types - for (int64_t i = 0; i < schema.n_children; i++) { - auto &child_schema = *schema.children[i]; - result->col_types.push_back(ArrowType::GetTypeFromSchema(context.db->config, child_schema)); - } - - return std::move(result); -} - -//---------------------------------------------------------------------------------------------------------------------- -// SCAN -//---------------------------------------------------------------------------------------------------------------------- -void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - auto &state = input.global_state->Cast(); - - ArrowArray arrow_array; - if (state.stream.get_next(&state.stream, &arrow_array) != 0 || arrow_array.release == nullptr) { - // Finished reading - output.SetCardinality(0); - return; - } - - // Now convert the Arrow array to DuckDB - for (idx_t i = 0; i < arrow_array.n_children; i++) { - auto &arr = *arrow_array.children[i]; - auto &vec = output.data[i]; - - auto &arrow_type = *state.col_types[i]; - auto array_state = ArrowArrayScanState(context); - - // We need to make sure that our chunk will hold the ownership - array_state.owned_data = make_shared_ptr(); - array_state.owned_data->arrow_array = arrow_array; - - // We set it to nullptr to effectively transfer the ownership - arrow_array.release = nullptr; - - switch (arrow_type.GetPhysicalType()) { - case ArrowArrayPhysicalType::DICTIONARY_ENCODED: - ArrowToDuckDBConversion::ColumnArrowToDuckDBDictionary(vec, arr, 0, array_state, arrow_array.length, - arrow_type); - break; - case ArrowArrayPhysicalType::RUN_END_ENCODED: - ArrowToDuckDBConversion::ColumnArrowToDuckDBRunEndEncoded(vec, arr, 0, array_state, arrow_array.length, - arrow_type); - break; - case ArrowArrayPhysicalType::DEFAULT: - ArrowToDuckDBConversion::SetValidityMask(vec, arr, 0, arrow_array.length, arrow_array.offset, -1); - ArrowToDuckDBConversion::ColumnArrowToDuckDB(vec, arr, 0, array_state, arrow_array.length, arrow_type); - break; - default: - throw NotImplementedException("ArrowArrayPhysicalType not recognized"); - } - } - - state.features_read += arrow_array.length; - output.SetCardinality(arrow_array.length); -} - -//------------------------------------------------------------------------------------------------------------------ -// CARDINALITY -//------------------------------------------------------------------------------------------------------------------ -auto Cardinality(ClientContext &context, const FunctionData *data) -> unique_ptr { - auto &bdata = data->Cast(); - auto result = make_uniq(); - - if (bdata.estimated_cardinality > -1) { - result->has_estimated_cardinality = true; - result->estimated_cardinality = bdata.estimated_cardinality; - result->has_max_cardinality = true; - result->max_cardinality = bdata.estimated_cardinality; - } - - return result; -} - -//---------------------------------------------------------------------------------------------------------------------- -// STATISTICS -//---------------------------------------------------------------------------------------------------------------------- -auto Statistics(ClientContext &context, const FunctionData *bind_data, column_t column_index) - -> unique_ptr { - - auto &bdata = bind_data->Cast(); - - // If we have an extent, and the column is a geometry column, we can provide min/max stats - if (bdata.has_extent) { - - // Check if this is the only geometry column - const auto is_geom_col = bdata.geometry_columns.find(column_index) != bdata.geometry_columns.end(); - const auto is_only_one = bdata.geometry_columns.size() == 1; - const auto has_stats = bdata.has_extent || bdata.layer_type != wkbUnknown; - - if (is_geom_col && is_only_one && has_stats) { - auto stats = GeometryStats::CreateUnknown(LogicalType::GEOMETRY()); - - if (bdata.has_extent) { - auto &extent = GeometryStats::GetExtent(stats); - extent.x_min = bdata.layer_extent.MinX; - extent.x_max = bdata.layer_extent.MaxX; - extent.y_min = bdata.layer_extent.MinY; - extent.y_max = bdata.layer_extent.MaxY; - } - - const auto geom_type = bdata.layer_type % 1000; - const auto vert_type = bdata.layer_type / 1000; - - if ((geom_type >= 1) && (geom_type <= 7) && (vert_type >= 0) && (vert_type <= 3)) { - auto &types = GeometryStats::GetTypes(stats); - types.Clear(); - types.AddWKBType(static_cast(geom_type)); - } - - return stats.ToUnique(); - } - } - - return nullptr; -} - -//---------------------------------------------------------------------------------------------------------------------- -// PROGRESS -//---------------------------------------------------------------------------------------------------------------------- -auto Progress(ClientContext &context, const FunctionData *b_data, const GlobalTableFunctionState *g_state) -> double { - auto &bdata = b_data->Cast(); - auto &gstate = g_state->Cast(); - - if (bdata.estimated_cardinality < 0) { - return 0.0; - } - - const auto count = static_cast(gstate.features_read.load()); - const auto total = static_cast(bdata.estimated_cardinality); - - return MinValue(100.0 * (total / count), 100.0); -} - -//------------------------------------------------------------------------------------------------------------------ -// REPLACEMENT SCAN -//------------------------------------------------------------------------------------------------------------------ -auto ReplacementScan(ClientContext &, ReplacementScanInput &input, optional_ptr) - -> unique_ptr { - auto &table_name = input.table_name; - auto lower_name = StringUtil::Lower(table_name); - // Check if the table name ends with some common geospatial file extensions - if (StringUtil::EndsWith(lower_name, ".gpkg") || StringUtil::EndsWith(lower_name, ".fgb")) { - - auto table_function = make_uniq(); - vector> children; - children.push_back(make_uniq(Value(table_name))); - table_function->function = make_uniq("ST_Read", std::move(children)); - return std::move(table_function); - } - // else not something we can replace - return nullptr; -} - -//---------------------------------------------------------------------------------------------------------------------- -// REGISTER -//---------------------------------------------------------------------------------------------------------------------- -void Register(ExtensionLoader &loader) { - TableFunction read_func("st_read", {LogicalType::VARCHAR}, Scan, Bind, InitGlobal); - read_func.cardinality = Cardinality; - read_func.statistics = Statistics; - read_func.table_scan_progress = Progress; - read_func.pushdown_complex_filter = Pushdown; - - read_func.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR); - read_func.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); - read_func.named_parameters["sibling_files"] = LogicalType::LIST(LogicalType::VARCHAR); - read_func.named_parameters["layer"] = LogicalType::VARCHAR; - read_func.named_parameters["max_batch_size"] = LogicalType::INTEGER; - read_func.named_parameters["keep_wkb"] = LogicalType::BOOLEAN; - - loader.RegisterFunction(read_func); - - auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance()); - config.replacement_scans.emplace_back(ReplacementScan); -} - -} // namespace gdal_read -//====================================================================================================================== -// GDAL COPY -//====================================================================================================================== -namespace gdal_copy { - -//---------------------------------------------------------------------------------------------------------------------- -// Bind -//---------------------------------------------------------------------------------------------------------------------- -class BindData final : public TableFunctionData { -public: - //string gdal_file_path; - //string real_file_path; - string driver_name; - string layer_name; - - CPLStringList driver_options; - CPLStringList layer_options; - - string target_srs; - OGRwkbGeometryType geometry_type; - - // Arrow info - ClientProperties props; - ArrowSchema schema; - unordered_map> extension_type_cast; - - ~BindData() override { - if (schema.release) { - schema.release(&schema); - } - } -}; - -bool MatchOption(const char *name, const pair> &option, bool list = false) { - if (StringUtil::CIEquals(name, option.first)) { - if (option.second.empty()) { - throw BinderException("GDAL COPY option '%s' requires a value", name); - } - if (!list) { - if (option.second.size() != 1) { - throw BinderException("GDAL COPY option '%s' only accepts a single value", name); - } - if (option.second.back().type().id() != LogicalTypeId::VARCHAR) { - throw BinderException("GDAL COPY option '%s' must be a string", name); - } - } else { - for (auto &val : option.second) { - if (val.type().id() != LogicalTypeId::VARCHAR) { - throw BinderException("GDAL COPY option '%s' must be a list of strings", name); - } - } - } - return true; - } - return false; -} - -auto Bind(ClientContext &context, CopyFunctionBindInput &input, const vector &names, - const vector &sql_types) -> unique_ptr { - auto result = make_uniq(); - - // Set file pat - const auto &file_path = input.info.file_path; - - // Parse options - for (auto &option : input.info.options) { - - if (MatchOption("DRIVER", option)) { - result->driver_name = option.second.back().GetValue(); - continue; - } - - if (MatchOption("LAYER_NAME", option)) { - result->layer_name = option.second.back().GetValue(); - continue; - } - - if (MatchOption("SRS", option) || MatchOption("CRS", option)) { - result->target_srs = option.second.back().GetValue(); - continue; - } - - if (MatchOption("GEOMETRY_TYPE", option)) { - auto type = option.second.back().GetValue(); - if (StringUtil::CIEquals(type, "POINT")) { - result->geometry_type = wkbPoint; - } else if (StringUtil::CIEquals(type, "LINESTRING")) { - result->geometry_type = wkbLineString; - } else if (StringUtil::CIEquals(type, "POLYGON")) { - result->geometry_type = wkbPolygon; - } else if (StringUtil::CIEquals(type, "MULTIPOINT")) { - result->geometry_type = wkbMultiPoint; - } else if (StringUtil::CIEquals(type, "MULTILINESTRING")) { - result->geometry_type = wkbMultiLineString; - } else if (StringUtil::CIEquals(type, "MULTIPOLYGON")) { - result->geometry_type = wkbMultiPolygon; - } else if (StringUtil::CIEquals(type, "GEOMETRYCOLLECTION")) { - result->geometry_type = wkbGeometryCollection; - } else { - throw BinderException("Unsupported GEOMETRY_TYPE: '%s'", type); - } - continue; - } - - if (MatchOption("LAYER_CREATION_OPTIONS", option, true)) { - for (auto &val : option.second) { - result->layer_options.AddString(val.GetValue().c_str()); - } - continue; - } - - if (MatchOption("DATASET_CREATION_OPTIONS", option, true)) { - for (auto &val : option.second) { - result->driver_options.AddString(val.GetValue().c_str()); - } - continue; - } - - throw BinderException("Unknown GDAL COPY option: '%s'", option.first); - } - - // Check that options are valid - if (result->driver_name.empty()) { - throw BinderException("GDAL COPY option 'DRIVER' is required"); - } - - if (result->layer_name.empty()) { - auto &fs = FileSystem::GetFileSystem(context); - result->layer_name = fs.ExtractBaseName(file_path); - } - - // Check the driver - const auto driver = GDALGetDriverByName(result->driver_name.c_str()); - if (!driver) { - throw BinderException("Could not find GDAL driver: " + result->driver_name); - } - - // Try to get the file extension from the driver - const auto file_ext = GDALGetMetadataItem(driver, GDAL_DMD_EXTENSIONS, nullptr); - if (file_ext) { - input.file_extension = file_ext; - } else { - const auto file_exts = GDALGetMetadataItem(driver, GDAL_DMD_EXTENSIONS, nullptr); - const auto exts = StringUtil::Split(file_exts, ' '); - if (!exts.empty()) { - input.file_extension = exts[0]; - } - } - - // Driver-specific checks - if (result->driver_name == "OpenFileGDB" && result->geometry_type == wkbUnknown) { - throw BinderException("OpenFileGDB requires 'GEOMETRY_TYPE' parameter to be set when writing!"); - } - - // Setup arrow schema - result->props = context.GetClientProperties(); - result->extension_type_cast = duckdb::ArrowTypeExtensionData::GetExtensionTypes(context, sql_types); - ArrowConverter::ToArrowSchema(&result->schema, sql_types, names, result->props); - - return std::move(result); -} - -//---------------------------------------------------------------------------------------------------------------------- -// Global State -//---------------------------------------------------------------------------------------------------------------------- -class GlobalState final : public GlobalFunctionData { -public: - ~GlobalState() override { - if (dataset) { - GDALClose(dataset); - dataset = nullptr; - } - if (srs) { - OSRDestroySpatialReference(srs); - srs = nullptr; - } - } - - mutex lock; - GDALDatasetH dataset = nullptr; - OGRLayerH layer = nullptr; - OGRSpatialReferenceH srs = nullptr; -}; - -auto InitGlobal(ClientContext &context, FunctionData &bdata_p, const string &real_file_path) - -> unique_ptr { - auto &bdata = bdata_p.Cast(); - auto result = make_uniq(); - - const auto driver = GDALGetDriverByName(bdata.driver_name.c_str()); - if (!driver) { - throw InvalidInputException("Could not find GDAL driver: " + bdata.driver_name); - } - - const auto &file_prefix = DuckDBFileSystemPrefix::GetOrCreate(context); - const auto gdal_file_path = file_prefix.AddPrefix(real_file_path); - - // Create Dataset - result->dataset = GDALCreate(driver, gdal_file_path.c_str(), 0, 0, 0, GDT_Unknown, bdata.driver_options); - if (!result->dataset) { - throw IOException("Could not create GDAL dataset at: " + real_file_path); - } - - if (!bdata.target_srs.empty()) { - // Make a new spatial reference object, and set it from the user input - result->srs = OSRNewSpatialReference(nullptr); - OSRSetFromUserInput(result->srs, bdata.target_srs.c_str()); - } - - // Create Layer - result->layer = GDALDatasetCreateLayer(result->dataset, bdata.layer_name.c_str(), result->srs, bdata.geometry_type, - bdata.layer_options); - - if (!result->layer) { - throw IOException("Could not create GDAL layer in dataset at: " + real_file_path); - } - - // Create fields for all children - auto geometry_field_count = 0; - for (auto i = 0; i < bdata.schema.n_children; i++) { - const auto child_schema = bdata.schema.children[i]; - - // Check if this is a geometry field - if (child_schema->metadata != nullptr) { - // TODO: Look for arrow metadata! - geometry_field_count++; - if (geometry_field_count > 1) { - throw NotImplementedException("Multiple geometry fields not supported yet"); - } - } else { - // Register normal attribute - if (!OGR_L_CreateFieldFromArrowSchema(result->layer, child_schema, nullptr)) { - throw IOException("Could not create field in GDAL layer for column: " + string(child_schema->name)); - } - } - } - - return std::move(result); -} - -//---------------------------------------------------------------------------------------------------------------------- -// Local State -//---------------------------------------------------------------------------------------------------------------------- -class LocalState final : public LocalFunctionData { -public: - ~LocalState() override { - if (array.release) { - array.release(&array); - array.release = nullptr; - } - } - ArrowArray array; -}; - -auto InitLocal(ExecutionContext &context, FunctionData &bind_data) -> unique_ptr { - auto result = make_uniq(); - return std::move(result); -} - -//---------------------------------------------------------------------------------------------------------------------- -// Sink -//---------------------------------------------------------------------------------------------------------------------- -void Sink(ExecutionContext &context, FunctionData &bdata_p, GlobalFunctionData &gstate_p, LocalFunctionData &lstate_p, - DataChunk &input) { - - const auto &bdata = bdata_p.Cast(); - auto &gstate = gstate_p.Cast(); - auto &lstate = lstate_p.Cast(); - - auto &arrow_array = lstate.array; - auto &arrow_schema = bdata.schema; - - // Convert to Arrow array - ArrowConverter::ToArrowArray(input, &arrow_array, bdata.props, bdata.extension_type_cast); - - // Sink the Arrow array into GDAL - { - // Lock - lock_guard guard(gstate.lock); - - // Sink into GDAL - OGR_L_WriteArrowBatch(gstate.layer, &arrow_schema, &arrow_array, nullptr); - } - - // Release the array - if (arrow_array.release) { - arrow_array.release(&arrow_array); - arrow_array.release = nullptr; - } -} - -//---------------------------------------------------------------------------------------------------------------------- -// Combine -//---------------------------------------------------------------------------------------------------------------------- -void Combine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate) { - // Nothing to do, we don't have any local state that needs to be merged -} - -//---------------------------------------------------------------------------------------------------------------------- -// Finalize -//---------------------------------------------------------------------------------------------------------------------- -void Finalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate_p) { - auto &gstate = gstate_p.Cast(); - - // Flush and close the dataset - GDALFlushCache(gstate.dataset); - GDALClose(gstate.dataset); - gstate.dataset = nullptr; -} - -CopyFunctionExecutionMode Mode(bool preserve_insertion_order, bool use_batch_index) { - // Parallel writes have limited utility since we still lock on each write to GDAL layer - // But in theory we still benefit from the parallel conversion to Arrow arrays, and this also allows - // the rest of the pipeline to be parallelized if we don't care about insertion order. - return preserve_insertion_order ? CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE - : CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE; -} - -//---------------------------------------------------------------------------------------------------------------------- -// Register -//---------------------------------------------------------------------------------------------------------------------- -void Register(ExtensionLoader &loader) { - CopyFunction info("GDAL"); - - info.copy_to_bind = Bind; - info.copy_to_initialize_local = InitLocal; - info.copy_to_initialize_global = InitGlobal; - info.copy_to_sink = Sink; - info.copy_to_combine = Combine; - info.copy_to_finalize = Finalize; - info.execution_mode = Mode; - info.extension = "gdal"; - - loader.RegisterFunction(info); -} - -} // namespace gdal_copy -} // namespace - -void RegisterExtraFunction(ExtensionLoader &loader) { - - // Load GDAL (once) - static std::once_flag loaded; - std::call_once(loaded, [&]() { - // Register all embedded drivers (dont go looking for plugins) - OGRRegisterAllInternal(); - - // Set GDAL error handler - CPLSetErrorHandler([](CPLErr e, int code, const char *raw_msg) { - // DuckDB doesnt do warnings, so we only throw on errors - if (e != CE_Failure && e != CE_Fatal) { - return; - } - - // GDAL Catches exceptions internally and passes them on to the handler again as CPLE_AppDefined - // So we don't add any extra information here or we end up with very long nested error messages. - // Using ErrorData we can parse the message part of DuckDB exceptions properly, and for other exceptions - // their error message will still be preserved as the "raw message". - ErrorData error_data(raw_msg); - auto msg = error_data.RawMessage(); - - // If the error contains a /vsiduckdb-/ prefix, - // try to strip it off to make the errors more readable - auto path_pos = msg.find("/vsiduckdb-"); - if (path_pos != string::npos) { - // We found a path, strip it off - msg.erase(path_pos, 48); - } - - switch (code) { - case CPLE_NoWriteAccess: - throw PermissionException(msg); - case CPLE_UserInterrupt: - throw InterruptException(); - case CPLE_OutOfMemory: - throw OutOfMemoryException(msg); - case CPLE_NotSupported: - throw NotImplementedException(msg); - case CPLE_AssertionFailed: - case CPLE_ObjectNull: - throw InternalException(msg); - case CPLE_IllegalArg: - throw InvalidInputException(msg); - case CPLE_AppDefined: - case CPLE_HttpResponse: - case CPLE_FileIO: - case CPLE_OpenFailed: - default: - throw IOException(msg); - } - }); - }); - - gdal_read::Register(loader); - gdal_copy::Register(loader); -} -} // namespace duckdb diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index c6056ff2..c58119e7 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -1,119 +1,117 @@ -#include "spatial/modules/gdal/gdal_module.hpp" - // Spatial -#include "spatial/spatial_types.hpp" -#include "spatial/geometry/sgl.hpp" -#include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/modules/gdal/gdal_module.hpp" #include "spatial/util/function_builder.hpp" -// DuckDB +// DUCKDB +#include "duckdb/main/extension/extension_loader.hpp" +#include "duckdb/function/copy_function.hpp" +#include "duckdb/function/table/arrow.hpp" +#include "duckdb/common/arrow/arrow_converter.hpp" +#include "duckdb/common/arrow/arrow.hpp" #include "duckdb/main/database.hpp" -#include "duckdb/common/enums/file_glob_options.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" #include "duckdb/common/multi_file/multi_file_reader.hpp" -#include "duckdb/function/table/arrow.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" #include "duckdb/common/types/uuid.hpp" -#include "duckdb/function/copy_function.hpp" #include "duckdb/parser/tableref/table_function_ref.hpp" -#include "duckdb/parser/parsed_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" // GDAL +#include "gdal.h" +#include "ogr_core.h" +#include "ogr_api.h" +#include "ogr_srs_api.h" +#include "ogrsf_frmts.h" #include "cpl_string.h" #include "cpl_vsi.h" #include "cpl_vsi_error.h" #include "cpl_vsi_virtual.h" -#include "ogrsf_frmts.h" namespace duckdb { - namespace { -//###################################################################################################################### -// DuckDB GDAL VFS -//###################################################################################################################### -// This implements a GDAL "VFS" (Virtual File System) that allows GDAL to read and write files from DuckDB's file -// system -// TODO: Make another pass at this, we should be able to clean it up a bit more. - +//====================================================================================================================== +// GDAL FILE +//====================================================================================================================== class DuckDBFileHandle final : public VSIVirtualHandle { -private: - unique_ptr file_handle; - bool is_eof; - public: explicit DuckDBFileHandle(unique_ptr file_handle_p) - : file_handle(std::move(file_handle_p)), is_eof(false) { + : file_handle(std::move(file_handle_p)), is_eof(false), can_seek(file_handle->CanSeek()) { } vsi_l_offset Tell() override { return static_cast(file_handle->SeekPosition()); } + int Seek(vsi_l_offset nOffset, int nWhence) override { + // Reset EOF flag on seek is_eof = false; + // Use the reset function instead to allow compressed file handles to rewind + // even if they don't support seeking if (nWhence == SEEK_SET && nOffset == 0) { - // Use the reset function instead to allow compressed file handles to rewind - // even if they don't support seeking file_handle->Reset(); return 0; } + switch (nWhence) { case SEEK_SET: file_handle->Seek(nOffset); - break; + return 0; case SEEK_CUR: file_handle->Seek(file_handle->SeekPosition() + nOffset); - break; + return 0; case SEEK_END: file_handle->Seek(file_handle->GetFileSize() + nOffset); - break; + return 0; default: - throw InternalException("Unknown seek type"); + return -1; } - return 0; } - size_t Read(void *pBuffer, size_t nSize, size_t nCount) override { - auto remaining_bytes = nSize * nCount; + size_t Read(void *buffer, size_t size, size_t count) override { + auto bytes_data = static_cast(buffer); + auto bytes_left = size * count; + try { - while (remaining_bytes > 0) { - auto read_bytes = file_handle->Read(pBuffer, remaining_bytes); - if (read_bytes == 0) { + while (bytes_left > 0) { + const auto bytes_read = file_handle->Read(bytes_data, bytes_left); + if (bytes_read == 0) { break; } - remaining_bytes -= read_bytes; - // Note we performed a cast back to void* - pBuffer = static_cast(pBuffer) + read_bytes; + bytes_left -= bytes_read; + bytes_data += bytes_read; } } catch (...) { - } - - if (remaining_bytes != 0) { - if (file_handle->SeekPosition() == file_handle->GetFileSize()) { - // Is at EOF! - is_eof = true; + if (bytes_left != 0) { + if (file_handle->SeekPosition() == file_handle->GetFileSize()) { + // Is at EOF! + is_eof = true; + } + } else { + // else, error! + // unfortunately, this version of GDAL cant distinguish between errors and reading less bytes + // its avaiable in 3.9.2, but we're stuck on 3.8.5 for now. + throw; } - // else, error! - // unfortunately, this version of GDAL cant distinguish between errors and reading less bytes - // its avaiable in 3.9.2, but we're stuck on 3.8.5 for now. } - return nCount - (remaining_bytes / nSize); + return count - (bytes_left / size); } int Eof() override { return is_eof ? TRUE : FALSE; } - size_t Write(const void *pBuffer, size_t nSize, size_t nCount) override { + size_t Write(const void *buffer, size_t size, size_t count) override { size_t written_bytes = 0; try { - written_bytes = file_handle->Write(const_cast(pBuffer), nSize * nCount); + written_bytes = file_handle->Write(const_cast(buffer), size * count); } catch (...) { + // ignore } - // Return the number of items written - return static_cast(written_bytes / nSize); + return written_bytes / size; } int Flush() override { @@ -129,45 +127,36 @@ class DuckDBFileHandle final : public VSIVirtualHandle { return 0; } - // int ReadMultiRange(int nRanges, void **ppData, const vsi_l_offset *panOffsets, const size_t *panSizes) override; - // void AdviseRead(int nRanges, const vsi_l_offset *panOffsets, const size_t *panSizes) override; - // VSIRangeStatus GetRangeStatus(vsi_l_offset nOffset, vsi_l_offset nLength) override; +private: + unique_ptr file_handle = nullptr; + bool is_eof = false; + bool can_seek = false; }; -//-------------------------------------------------------------------------- -// GDAL DuckDB File system wrapper -//-------------------------------------------------------------------------- -bool IsStdCharDev(const char *file_name) { - return !strcmp(file_name, "/dev/stdin") || !strcmp(file_name, "/dev/stdout") || !strcmp(file_name, "/dev/stderr") || - !strcmp(file_name, "/dev/null") || !strcmp(file_name, "/dev/zero"); -} - class DuckDBFileSystemHandler final : public VSIFilesystemHandler { -private: - string client_prefix; - ClientContext &context; - public: DuckDBFileSystemHandler(string client_prefix, ClientContext &context) : client_prefix(std::move(client_prefix)), context(context) {}; - const char *StripPrefix(const char *pszFilename) { + const char *StripPrefix(const char *pszFilename) const { return pszFilename + client_prefix.size(); } - - string AddPrefix(const string &value) { + string AddPrefix(const string &value) const { return client_prefix + value; } - VSIVirtualHandle *Open(const char *prefixed_file_name, const char *access, bool bSetError, - CSLConstList /* papszOptions */) override { - auto file_name = StripPrefix(prefixed_file_name); - auto file_name_str = string(file_name); + VSIVirtualHandle *Open(const char *gdal_file_path, const char *access, bool set_error, + CSLConstList /*papszoptions */) override { + + // Strip the prefix to get the real file path + const auto real_file_path = StripPrefix(gdal_file_path); + + // Get the DuckDB file system auto &fs = FileSystem::GetFileSystem(context); - // TODO: Double check that this is correct + // Determine the file open flags FileOpenFlags flags; - auto len = strlen(access); + const auto len = strlen(access); if (access[0] == 'r') { flags = FileFlags::FILE_FLAGS_READ; if (len > 1 && access[1] == '+') { @@ -179,7 +168,7 @@ class DuckDBFileSystemHandler final : public VSIFilesystemHandler { } } else if (access[0] == 'w') { flags = FileFlags::FILE_FLAGS_WRITE; - if (!IsStdCharDev(file_name)) { + if (!fs.IsPipe(real_file_path)) { flags |= FileFlags::FILE_FLAGS_FILE_CREATE_NEW; } if (len > 1 && access[1] == '+') { @@ -203,151 +192,140 @@ class DuckDBFileSystemHandler final : public VSIFilesystemHandler { } try { - // Check if the file is a directory - -#ifdef _WIN32 - if (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name_str) && (flags.OpenForReading())) { - // We can't open a directory for reading on windows without special flags - // so just open nul instead, gdal will reject it when it tries to read - auto file = fs.OpenFile("nul", flags); - return new DuckDBFileHandle(std::move(file)); - } -#endif + auto file = fs.OpenFile(real_file_path, flags | FileCompressionType::AUTO_DETECT); + return new DuckDBFileHandle(std::move(file)); - // If the file is remote and NOT in write mode, we can cache it. - if (FileSystem::IsRemoteFile(file_name_str) && !flags.OpenForWriting() && !flags.OpenForAppending()) { + } catch (std::exception &ex) { - // Pass the direct IO flag to the file system since we use GDAL's caching instead - flags |= FileFlags::FILE_FLAGS_DIRECT_IO; + // Extract error message from DuckDB + const ErrorData error_data(ex); - auto file = fs.OpenFile(file_name, flags | FileCompressionType::AUTO_DETECT); - return VSICreateCachedFile(new DuckDBFileHandle(std::move(file))); - } else { - auto file = fs.OpenFile(file_name, flags | FileCompressionType::AUTO_DETECT); - return new DuckDBFileHandle(std::move(file)); - } - } catch (std::exception &ex) { // Failed to open file via DuckDB File System. If this doesnt have a VSI prefix we can return an error here. - if (strncmp(file_name, "/vsi", 4) != 0 && !IsStdCharDev(file_name)) { - if (bSetError) { - VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what()); + if (strncmp(real_file_path, "/vsi", 4) != 0) { + if (set_error) { + VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); } return nullptr; } // Fall back to GDAL instead (if external access is enabled) if (!context.db->config.options.enable_external_access) { - if (bSetError) { - VSIError(VSIE_FileError, "Failed to open file %s with GDAL: External access is disabled", - file_name); + if (set_error) { + VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); } return nullptr; } - const auto handler = VSIFileManager::GetHandler(file_name); + const auto handler = VSIFileManager::GetHandler(real_file_path); if (!handler) { - if (bSetError) { - VSIError(VSIE_FileError, "Failed to open file %s: %s", file_name, ex.what()); + if (set_error) { + VSIError(VSIE_FileError, "%s", error_data.RawMessage().c_str()); } return nullptr; } - return handler->Open(file_name, access); + return handler->Open(real_file_path, access); } } - int Stat(const char *prefixed_file_name, VSIStatBufL *pstatbuf, int n_flags) override { - auto file_name = StripPrefix(prefixed_file_name); + int Stat(const char *gdal_file_name, VSIStatBufL *result, int n_flags) override { + auto real_file_path = StripPrefix(gdal_file_name); auto &fs = FileSystem::GetFileSystem(context); - memset(pstatbuf, 0, sizeof(VSIStatBufL)); + memset(result, 0, sizeof(VSIStatBufL)); - if (IsStdCharDev(file_name)) { - pstatbuf->st_mode = S_IFCHR; + if (fs.IsPipe(real_file_path)) { + result->st_mode = S_IFCHR; return 0; } - if (!(fs.FileExists(file_name) || (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name)))) { + if (!(fs.FileExists(real_file_path) || + (!FileSystem::IsRemoteFile(real_file_path) && fs.DirectoryExists(real_file_path)))) { return -1; } #ifdef _WIN32 - if (!FileSystem::IsRemoteFile(file_name) && fs.DirectoryExists(file_name)) { + if (!FileSystem::IsRemoteFile(real_file_path) && fs.DirectoryExists(real_file_path)) { pstatbuf->st_mode = S_IFDIR; return 0; } #endif - unique_ptr file; - try { - file = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ | FileCompressionType::AUTO_DETECT | - FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS); - } catch (std::exception &ex) { - return -1; - } + FileOpenFlags flags; + flags |= FileFlags::FILE_FLAGS_READ; + flags |= FileFlags::FILE_FLAGS_NULL_IF_NOT_EXISTS; + flags |= FileCompressionType::AUTO_DETECT; + + const auto file = fs.OpenFile(real_file_path, flags); if (!file) { return -1; } - pstatbuf->st_size = static_cast(fs.GetFileSize(*file)); - pstatbuf->st_mtime = Timestamp::ToTimeT(fs.GetLastModifiedTime(*file)); - - auto type = file->GetType(); - switch (type) { - // These are the only three types present on all platforms - case FileType::FILE_TYPE_REGULAR: - pstatbuf->st_mode = S_IFREG; - break; - case FileType::FILE_TYPE_DIR: - pstatbuf->st_mode = S_IFDIR; - break; - case FileType::FILE_TYPE_CHARDEV: - pstatbuf->st_mode = S_IFCHR; - break; - default: - // HTTPFS returns invalid type for everything basically. - if (FileSystem::IsRemoteFile(file_name)) { - pstatbuf->st_mode = S_IFREG; - } else { - return -1; + try { + result->st_size = static_cast(fs.GetFileSize(*file)); + } catch (...) { + } + try { + result->st_mtime = Timestamp::ToTimeT(fs.GetLastModifiedTime(*file)); + } catch (...) { + } + try { + const auto type = file->GetType(); + switch (type) { + case FileType::FILE_TYPE_REGULAR: + result->st_mode = S_IFREG; + break; + case FileType::FILE_TYPE_DIR: + result->st_mode = S_IFDIR; + break; + case FileType::FILE_TYPE_CHARDEV: + result->st_mode = S_IFCHR; + break; + default: + // HTTPFS returns invalid type for everything basically. + if (FileSystem::IsRemoteFile(real_file_path)) { + result->st_mode = S_IFREG; + } else { + return -1; + } } + } catch (...) { } - return 0; } - bool IsLocal(const char *prefixed_file_name) override { - auto file_name = StripPrefix(prefixed_file_name); - return !FileSystem::IsRemoteFile(file_name); + bool IsLocal(const char *gdal_file_path) override { + const auto real_file_path = StripPrefix(gdal_file_path); + return !FileSystem::IsRemoteFile(real_file_path); } - int Mkdir(const char *prefixed_dir_name, long mode) override { - auto dir_name = StripPrefix(prefixed_dir_name); + int Mkdir(const char *pszDirname, long nMode) override { auto &fs = FileSystem::GetFileSystem(context); + const auto dir_name = StripPrefix(pszDirname); fs.CreateDirectory(dir_name); return 0; } - int Rmdir(const char *prefixed_dir_name) override { - auto dir_name = StripPrefix(prefixed_dir_name); + int Rmdir(const char *pszDirname) override { auto &fs = FileSystem::GetFileSystem(context); + const auto dir_name = StripPrefix(pszDirname); fs.RemoveDirectory(dir_name); return 0; } - int RmdirRecursive(const char *prefixed_dir_name) override { - auto dir_name = StripPrefix(prefixed_dir_name); + int RmdirRecursive(const char *pszDirname) override { auto &fs = FileSystem::GetFileSystem(context); + const auto dir_name = StripPrefix(pszDirname); fs.RemoveDirectory(dir_name); return 0; } - char **ReadDirEx(const char *prefixed_dir_name, int max_files) override { - auto dir_name = StripPrefix(prefixed_dir_name); + char **ReadDirEx(const char *gdal_dir_name, int max_files) override { auto &fs = FileSystem::GetFileSystem(context); + const auto dir_name = StripPrefix(gdal_dir_name); CPLStringList files; auto files_count = 0; @@ -362,20 +340,18 @@ class DuckDBFileSystemHandler final : public VSIFilesystemHandler { return files.StealList(); } - char **SiblingFiles(const char *prefixed_file_name) override { - auto file_name = StripPrefix(prefixed_file_name); - + char **SiblingFiles(const char *gdal_file_path) override { auto &fs = FileSystem::GetFileSystem(context); - CPLStringList files; - auto file_name_without_ext = - fs.JoinPath(StringUtil::GetFilePath(file_name), StringUtil::GetFileStem(file_name)); - auto file_glob = file_name_without_ext + ".*"; + const auto real_file_path = StripPrefix(gdal_file_path); - auto file_vector = fs.Glob(file_glob); - for (auto &file : file_vector) { - auto tmp = AddPrefix(file.path); - files.AddString(tmp.c_str()); + const auto real_file_stem = StringUtil::GetFileStem(real_file_path); + const auto base_file_path = fs.JoinPath(StringUtil::GetFilePath(real_file_path), real_file_stem); + const auto glob_file_path = base_file_path + ".*"; + + CPLStringList files; + for (auto &file : fs.Glob(glob_file_path)) { + files.AddString(AddPrefix(file.path).c_str()); } return files.StealList(); } @@ -385,651 +361,650 @@ class DuckDBFileSystemHandler final : public VSIFilesystemHandler { } int Unlink(const char *prefixed_file_name) override { - auto file_name = StripPrefix(prefixed_file_name); auto &fs = FileSystem::GetFileSystem(context); + const auto real_file_path = StripPrefix(prefixed_file_name); try { - fs.RemoveFile(file_name); + fs.RemoveFile(real_file_path); return 0; - } catch (std::exception &ex) { + } catch (...) { return -1; } } -}; -//###################################################################################################################### -// Context State -//###################################################################################################################### -// We give every client a unique prefix so that multiple connections can use their own attached file systems. -// This is necessary because GDAL is not otherwise aware of the connection context. + int Rename(const char *oldpath, const char *newpath) override { + auto &fs = FileSystem::GetFileSystem(context); + const auto real_old_path = StripPrefix(oldpath); + const auto real_new_path = StripPrefix(newpath); -class GDALClientContextState final : public ClientContextState { - ClientContext &context; - string client_prefix; - DuckDBFileSystemHandler *fs_handler; + try { + fs.MoveFile(real_old_path, real_new_path); + return 0; + } catch (...) { + return -1; + } + } -public: - explicit GDALClientContextState(ClientContext &context); - ~GDALClientContextState() override; - void QueryEnd() override; - string GetPrefix(const string &value) const; - static GDALClientContextState &GetOrCreate(ClientContext &context); +private: + string client_prefix; + ClientContext &context; }; -GDALClientContextState::GDALClientContextState(ClientContext &context) : context(context) { - - // Create a new random prefix for this client - client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID())); - - // Create a new file handler responding to this prefix - fs_handler = new DuckDBFileSystemHandler(client_prefix, context); - - // Register the file handler - VSIFileManager::InstallHandler(client_prefix, fs_handler); - - // Also pass a reference to the client context -} +class DuckDBFileSystemPrefix final : public ClientContextState { +public: + explicit DuckDBFileSystemPrefix(ClientContext &context) : context(context) { + // Create a new random prefix for this client + client_prefix = StringUtil::Format("/vsiduckdb-%s/", UUID::ToString(UUID::GenerateRandomUUID())); -GDALClientContextState::~GDALClientContextState() { - // Uninstall the file handler for this prefix - VSIFileManager::RemoveHandler(client_prefix); + // Create a new file handler responding to this prefix + fs_handler = make_uniq(client_prefix, context); - // Delete the file handler - delete fs_handler; -} + // Register the file handler + VSIFileManager::InstallHandler(client_prefix, fs_handler.get()); + } -void GDALClientContextState::QueryEnd() { -} + ~DuckDBFileSystemPrefix() override { + // Uninstall the file handler for this prefix + VSIFileManager::RemoveHandler(client_prefix); + } -string GDALClientContextState::GetPrefix(const string &value) const { - // If the user explicitly asked for a VSI prefix, we don't add our own - if (StringUtil::StartsWith(value, "/vsi")) { - if (!context.db->config.options.enable_external_access) { - throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value); + string AddPrefix(const string &value) const { + // If the user explicitly asked for a VSI prefix, we don't add our own + if (StringUtil::StartsWith(value, "/vsi")) { + if (!context.db->config.options.enable_external_access) { + throw PermissionException("Cannot open file '%s' with VSI prefix: External access is disabled", value); + } + return value; } - return value; + return client_prefix + value; } - return client_prefix + value; -} -GDALClientContextState &GDALClientContextState::GetOrCreate(ClientContext &context) { - auto gdal_state = context.registered_state->GetOrCreate("gdal", context); - return *gdal_state; -} + static DuckDBFileSystemPrefix &GetOrCreate(ClientContext &context) { + return *context.registered_state->GetOrCreate("gdal", context); + } -//###################################################################################################################### -// Functions -//###################################################################################################################### +private: + ClientContext &context; + string client_prefix; + unique_ptr fs_handler; +}; //====================================================================================================================== -// ST_Read +// GDAL READ //====================================================================================================================== +namespace gdal_read { -struct ST_Read : ArrowTableFunction { - - //------------------------------------------------------------------------------------------------------------------ - // Misc - //------------------------------------------------------------------------------------------------------------------ - enum class SpatialFilterType { Wkb, Rectangle }; +//---------------------------------------------------------------------------------------------------------------------- +// BIND +//---------------------------------------------------------------------------------------------------------------------- +class BindData final : public TableFunctionData { +public: + string real_file_path; + string gdal_file_path; - struct SpatialFilter { - SpatialFilterType type; - explicit SpatialFilter(SpatialFilterType type_p) : type(type_p) {}; - }; + int layer_idx = 0; + bool keep_wkb = false; - struct RectangleSpatialFilter : SpatialFilter { - double min_x, min_y, max_x, max_y; - RectangleSpatialFilter(double min_x_p, double min_y_p, double max_x_p, double max_y_p) - : SpatialFilter(SpatialFilterType::Rectangle), min_x(min_x_p), min_y(min_y_p), max_x(max_x_p), - max_y(max_y_p) { - } - }; + CPLStringList layer_options; + CPLStringList dataset_options; + CPLStringList dataset_sibling; + CPLStringList dataset_drivers; - struct WKBSpatialFilter : SpatialFilter { - OGRGeometryH geom; - explicit WKBSpatialFilter(const string &wkb_p) : SpatialFilter(SpatialFilterType::Wkb), geom(nullptr) { - auto ok = OGR_G_CreateFromWkb(wkb_p.c_str(), nullptr, &geom, (int)wkb_p.size()); - if (ok != OGRERR_NONE) { - throw InvalidInputException("WKBSpatialFilter: could not create geometry from WKB"); - } - } - ~WKBSpatialFilter() { - OGR_G_DestroyGeometry(geom); - } - }; + int64_t estimated_cardinality = 0; + unordered_set geometry_columns = {}; - static void TryApplySpatialFilter(OGRLayer *layer, SpatialFilter *spatial_filter) { - if (spatial_filter != nullptr) { - if (spatial_filter->type == SpatialFilterType::Rectangle) { - auto &rect = static_cast(*spatial_filter); - layer->SetSpatialFilterRect(rect.min_x, rect.min_y, rect.max_x, rect.max_y); - } else if (spatial_filter->type == SpatialFilterType::Wkb) { - auto &filter = static_cast(*spatial_filter); - layer->SetSpatialFilter(OGRGeometry::FromHandle(filter.geom)); - } - } - } + bool can_filter = false; + bool has_extent = false; + bool has_filter = false; + OGREnvelope layer_extent; + OGREnvelope layer_filter; - //------------------------------------------------------------------------------------------------------------------ - // Bind - //------------------------------------------------------------------------------------------------------------------ - struct BindData final : TableFunctionData { + OGRwkbGeometryType layer_type = wkbUnknown; +}; - int layer_idx = 0; - bool sequential_layer_scan = false; - bool keep_wkb = false; - unordered_set geometry_column_ids = {}; - unique_ptr spatial_filter = nullptr; +auto Bind(ClientContext &ctx, TableFunctionBindInput &input, vector &col_types, vector &col_names) + -> unique_ptr { - // before they are renamed - vector all_names = {}; - vector all_types = {}; - ArrowTableSchema arrow_table = {}; + auto result = make_uniq(); - bool has_approximate_feature_count = false; - idx_t approximate_feature_count = 0; - string raw_file_name; - string prefixed_file_name; - CPLStringList dataset_open_options; - CPLStringList dataset_allowed_drivers; - CPLStringList dataset_sibling_files; - CPLStringList layer_creation_options; - }; + // Get the file prefix associated with this connection + const auto &file_prefix = DuckDBFileSystemPrefix::GetOrCreate(ctx); - static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { + // Pass file path + result->real_file_path = input.inputs[0].GetValue(); + result->gdal_file_path = file_prefix.AddPrefix(result->real_file_path); - // Result - auto result = make_uniq(); + // Parse options + const auto dataset_options_param = input.named_parameters.find("open_options"); + if (dataset_options_param != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(dataset_options_param->second)) { + result->dataset_options.AddString(StringValue::Get(param).c_str()); + } + } - auto options_param = input.named_parameters.find("open_options"); - if (options_param != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(options_param->second)) { - result->dataset_open_options.AddString(StringValue::Get(param).c_str()); - } + const auto drivers_param = input.named_parameters.find("allowed_drivers"); + if (drivers_param != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(drivers_param->second)) { + result->dataset_drivers.AddString(StringValue::Get(param).c_str()); } + } - auto drivers_param = input.named_parameters.find("allowed_drivers"); - if (drivers_param != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(drivers_param->second)) { - result->dataset_allowed_drivers.AddString(StringValue::Get(param).c_str()); - } + const auto siblings_params = input.named_parameters.find("sibling_files"); + if (siblings_params != input.named_parameters.end()) { + for (auto ¶m : ListValue::GetChildren(siblings_params->second)) { + result->dataset_sibling.AddString(file_prefix.AddPrefix(StringValue::Get(param)).c_str()); } + } - // Now we can open the dataset - auto &ctx_state = GDALClientContextState::GetOrCreate(context); + const auto keep_wkb_param = input.named_parameters.find("keep_wkb"); + if (keep_wkb_param != input.named_parameters.end()) { + result->keep_wkb = BooleanValue::Get(keep_wkb_param->second); + } - auto siblings_params = input.named_parameters.find("sibling_files"); - if (siblings_params != input.named_parameters.end()) { - for (auto ¶m : ListValue::GetChildren(siblings_params->second)) { - result->dataset_sibling_files.AddString(ctx_state.GetPrefix(StringValue::Get(param)).c_str()); - } - } + // Set additional default GDAL default options - result->raw_file_name = input.inputs[0].GetValue(); - result->prefixed_file_name = ctx_state.GetPrefix(result->raw_file_name); + // This for OSM, but we don't know if we are reading OSM until we open the dataset, so just always set it for now. + //result->dataset_options.AddString("INTERLEAVED_READING=YES"); - auto dataset = GDALDatasetUniquePtr(GDALDataset::Open( - result->prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR, result->dataset_allowed_drivers, - result->dataset_open_options, result->dataset_sibling_files)); + // This is so taht we dont have to deal with chunking ourselves, let GDAL do it for us + result->layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).c_str()); - if (dataset == nullptr) { - auto error = string(CPLGetLastErrorMsg()); - throw IOException("Could not open file: " + result->raw_file_name + " (" + error + ")"); - } + // We always want GeoArrow geometry which DuckDB knows how to convert to GEOMETRY type, unless `keep_wkb` is set + if (!result->keep_wkb) { + result->layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); + } + + // Open the dataset and get the Arrow schema + const auto dataset = GDALOpenEx(result->gdal_file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, + result->dataset_drivers, result->dataset_options, result->dataset_sibling); - // Double check that the dataset have any layers - if (dataset->GetLayerCount() <= 0) { - throw IOException("Dataset does not contain any layers"); + if (!dataset) { + throw IOException("Could not open GDAL dataset at: %s", result->real_file_path); + } + + ArrowSchema schema; + ArrowArrayStream stream; + + try { + + const auto layer_count = GDALDatasetGetLayerCount(dataset); + if (layer_count <= 0) { + throw IOException("GDAL dataset contains no layers at: %s", result->real_file_path); } - // Now we can bind the additonal options - bool max_batch_size_set = false; - for (auto &kv : input.named_parameters) { - auto loption = StringUtil::Lower(kv.first); - if (loption == "layer") { + // Find layer + const auto layer_param = input.named_parameters.find("layer"); + if (layer_param != input.named_parameters.end()) { + if (layer_param->second.type() == LogicalType::INTEGER) { // Find layer by index - if (kv.second.type() == LogicalType::INTEGER) { - auto layer_idx = IntegerValue::Get(kv.second); - if (layer_idx < 0) { - throw BinderException("Layer index must be positive"); - } - if (layer_idx > dataset->GetLayerCount()) { - throw BinderException( - StringUtil::Format("Layer index too large (%s > %s)", layer_idx, dataset->GetLayerCount())); - } - result->layer_idx = layer_idx; + const auto layer_idx = IntegerValue::Get(layer_param->second); + if (layer_idx < 0) { + throw BinderException("Layer index must be positive"); } - + if (layer_idx > layer_count) { + throw BinderException( + StringUtil::Format("Layer index out of range (%s > %s)", layer_idx, layer_count)); + } + result->layer_idx = layer_idx; + } else if (layer_param->second.type() == LogicalType::VARCHAR) { // Find layer by name - if (kv.second.type() == LogicalTypeId::VARCHAR) { - auto name = StringValue::Get(kv.second).c_str(); - bool found = false; - for (auto layer_idx = 0; layer_idx < dataset->GetLayerCount(); layer_idx++) { - if (strcmp(dataset->GetLayer(layer_idx)->GetName(), name) == 0) { - result->layer_idx = layer_idx; - found = true; - break; - } + const auto &layer_name = StringValue::Get(layer_param->second); + auto found = false; + for (int i = 0; i < layer_count; i++) { + const auto layer = GDALDatasetGetLayer(dataset, i); + if (!layer) { + continue; } - if (!found) { - throw BinderException(StringUtil::Format("Layer '%s' could not be found in dataset", name)); + if (OGR_L_GetName(layer) == layer_name) { + result->layer_idx = i; + found = true; + break; } } - } - - if (loption == "spatial_filter_box" && kv.second.type() == GeoTypes::BOX_2D()) { - if (result->spatial_filter) { - throw BinderException("Only one spatial filter can be specified"); + if (!found) { + throw BinderException("Could not find layer with name: %s", layer_name); } - auto &children = StructValue::GetChildren(kv.second); - auto minx = DoubleValue::Get(children[0]); - auto miny = DoubleValue::Get(children[1]); - auto maxx = DoubleValue::Get(children[2]); - auto maxy = DoubleValue::Get(children[3]); - result->spatial_filter = make_uniq(minx, miny, maxx, maxy); } + } - if (loption == "spatial_filter" && kv.second.type() == LogicalType::GEOMETRY()) { - if (result->spatial_filter) { - throw BinderException("Only one spatial filter can be specified"); - } - auto wkb = StringValue::Get(kv.second); - result->spatial_filter = make_uniq(wkb); - } + // Get the layer by index + const auto layer = GDALDatasetGetLayer(dataset, result->layer_idx); + if (!layer) { + throw IOException("Could not get GDAL layer at: %s", result->real_file_path); + } - if (loption == "sequential_layer_scan") { - result->sequential_layer_scan = BooleanValue::Get(kv.second); - } + // Estimate cardinality + result->estimated_cardinality = OGR_L_GetFeatureCount(layer, 0); - if (loption == "max_batch_size") { - auto max_batch_size = IntegerValue::Get(kv.second); - if (max_batch_size <= 0) { - throw BinderException("'max_batch_size' parameter must be positive"); - } - auto str = StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", max_batch_size); - result->layer_creation_options.AddString(str.c_str()); - max_batch_size_set = true; - } + // Get extent (Only if spatial filter is not pushed down!) + if (OGR_L_GetExtent(layer, &result->layer_extent, 0) == OGRERR_NONE) { + result->has_extent = true; + } - if (loption == "keep_wkb") { - result->keep_wkb = BooleanValue::Get(kv.second); - } + // Check if fast spatial filtering is available + if (OGR_L_TestCapability(layer, OLCFastSpatialFilter)) { + result->can_filter = true; } - // Defaults - result->layer_creation_options.AddString("INCLUDE_FID=NO"); - if (!max_batch_size_set) { - // Set default max batch size to standard vector size - auto str = StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE); - result->layer_creation_options.AddString(str.c_str()); + // Get the layer geometry type if available + result->layer_type = OGR_L_GetGeomType(layer); + + // Check FID column + const auto fid_col = OGR_L_GetFIDColumn(layer); + if (fid_col && strcmp(fid_col, "") != 0) { + // Do not include the explicit FID if we already have it as a column + result->layer_options.AddString("INCLUDE_FID=NO"); } + const auto geom_col_name = OGR_L_GetGeometryColumn(layer); - result->layer_creation_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); + // Get the arrow stream + if (!OGR_L_GetArrowStream(layer, &stream, result->layer_options.List())) { + throw IOException("Could not get GDAL Arrow stream at: %s", result->real_file_path); + } - // Get the schema for the selected layer - auto layer = dataset->GetLayer(result->layer_idx); + // And the schema + if (stream.get_schema(&stream, &schema) != 0) { + throw IOException("Could not get GDAL Arrow schema at: %s", result->real_file_path); + } - TryApplySpatialFilter(layer, result->spatial_filter.get()); + // Convert Arrow schema to DuckDB types + for (int64_t i = 0; i < schema.n_children; i++) { + auto &child_schema = *schema.children[i]; + const auto gdal_type = ArrowType::GetTypeFromSchema(ctx.db->config, child_schema); + auto duck_type = gdal_type->GetDuckType(); - // Check if we can get an approximate feature count - result->approximate_feature_count = 0; - result->has_approximate_feature_count = false; - if (!result->sequential_layer_scan) { - // Dont force compute the count if its expensive - auto count = layer->GetFeatureCount(false); - if (count > -1) { - result->approximate_feature_count = count; - result->has_approximate_feature_count = true; + // Track geometry columns to compute stats later + if (duck_type.id() == LogicalTypeId::GEOMETRY) { + result->geometry_columns.insert(i); } - } - struct ArrowArrayStream stream; - if (!layer->GetArrowStream(&stream, result->layer_creation_options)) { - // layer is owned by GDAL, we do not need to destory it - throw IOException("Could not get arrow stream from layer"); + if (geom_col_name && (strcmp(geom_col_name, "") == 0) && (strcmp(child_schema.name, "wkb_geometry") == 0) && + !result->keep_wkb) { + // Rename the geometry column to "geom" unless keep_wkb is set + col_names.push_back("geom"); + } else { + col_names.push_back(child_schema.name); + } + + col_types.push_back(std::move(duck_type)); } - struct ArrowSchema schema; - if (stream.get_schema(&stream, &schema) != 0) { - if (stream.release) { - stream.release(&stream); - } - throw IOException("Could not get arrow schema from layer"); + } catch (...) { + // Release stream, schema and dataset + if (schema.release) { + schema.release(&schema); } + if (stream.release) { + stream.release(&stream); + } + if (dataset) { + GDALClose(dataset); + } + // Re-throw exception + throw; + } - // The Arrow API will return attributes in this order - // 1. FID column - // 2. all ogr field attributes - // 3. all geometry columns + if (schema.release) { + schema.release(&schema); + } + if (stream.release) { + stream.release(&stream); + } + if (dataset) { + GDALClose(dataset); + } - auto attribute_count = schema.n_children; - auto attributes = schema.children; + return std::move(result); +} - result->all_names.reserve(attribute_count + 1); - names.reserve(attribute_count + 1); +//---------------------------------------------------------------------------------------------------------------------- +// FILTER (EXPRESSION) PUSHDOWN +//---------------------------------------------------------------------------------------------------------------------- +auto Pushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data, vector> &filters) + -> void { - for (idx_t col_idx = 0; col_idx < (idx_t)attribute_count; col_idx++) { - auto &attribute = *attributes[col_idx]; + auto &bdata = bind_data->Cast(); - const char ogc_flag[] = {'\x01', '\0', '\0', '\0', '\x14', '\0', '\0', '\0', 'A', 'R', 'R', 'O', 'W', - ':', 'e', 'x', 't', 'e', 'n', 's', 'i', 'o', 'n', ':', 'n', 'a', - 'm', 'e', '\a', '\0', '\0', '\0', 'o', 'g', 'c', '.', 'w', 'k', 'b'}; + if (!bdata.can_filter) { + return; + } - auto arrow_type = ArrowType::GetArrowLogicalType(DBConfig::GetConfig(context), attribute); + if (bdata.geometry_columns.size() != 1) { + return; // Only optimize if there is a single geometry column + } - auto column_name = string(attribute.name); - auto duckdb_type = arrow_type->GetDuckType(); + optional_idx geom_filter_idx = optional_idx::Invalid(); - if (duckdb_type.id() == LogicalTypeId::BLOB && attribute.metadata != nullptr && - strncmp(attribute.metadata, ogc_flag, sizeof(ogc_flag)) == 0) { - // This is a WKB geometry blob - result->arrow_table.AddColumn(col_idx, std::move(arrow_type), column_name); + for (idx_t expr_idx = 0; expr_idx < filters.size(); expr_idx++) { + const auto &expr = filters[expr_idx]; - if (result->keep_wkb) { - return_types.emplace_back(LogicalType::BLOB); - } else { - return_types.emplace_back(LogicalType::GEOMETRY()); - if (column_name == "wkb_geometry") { - column_name = "geom"; - } - } - result->geometry_column_ids.insert(col_idx); + if (expr->GetExpressionType() != ExpressionType::BOUND_FUNCTION) { + continue; + } + if (expr->return_type != LogicalType::BOOLEAN) { + continue; + } + const auto &func = expr->Cast(); + if (func.children.size() != 2) { + continue; + } - } else if (attribute.dictionary) { - auto dictionary_type = ArrowType::GetArrowLogicalType(DBConfig::GetConfig(context), attribute); - return_types.emplace_back(dictionary_type->GetDuckType()); - arrow_type->SetDictionary(std::move(dictionary_type)); - result->arrow_table.AddColumn(col_idx, std::move(arrow_type), column_name); - } else { - return_types.emplace_back(arrow_type->GetDuckType()); - result->arrow_table.AddColumn(col_idx, std::move(arrow_type), column_name); - } + if (func.children[0]->return_type.id() != LogicalTypeId::GEOMETRY || + func.children[1]->return_type.id() != LogicalTypeId::GEOMETRY) { + continue; + } - // keep these around for projection/filter pushdown later - // does GDAL even allow duplicate/missing names? - result->all_names.push_back(column_name); + // The set of geometry predicates that can be optimized using the bounding box + static constexpr const char *geometry_predicates[2] = {"&&", "st_intersects_extent"}; - if (column_name.empty()) { - names.push_back("v" + to_string(col_idx)); - } else { - names.push_back(column_name); + auto found = false; + for (const auto &name : geometry_predicates) { + if (StringUtil::CIEquals(func.function.name.c_str(), name)) { + found = true; + break; } } + if (!found) { + // Not a geometry predicate we can optimize + continue; + } - result->all_types = return_types; + const auto lhs_kind = func.children[0]->GetExpressionType(); + const auto rhs_kind = func.children[1]->GetExpressionType(); - schema.release(&schema); - stream.release(&stream); + const auto lhs_is_const = + lhs_kind == ExpressionType::VALUE_CONSTANT && rhs_kind == ExpressionType::BOUND_COLUMN_REF; + const auto rhs_is_const = + rhs_kind == ExpressionType::VALUE_CONSTANT && lhs_kind == ExpressionType::BOUND_COLUMN_REF; - // Rename columns if they are duplicates - unordered_map name_map; - for (auto &column_name : names) { - // put it all lower_case - auto low_column_name = StringUtil::Lower(column_name); - if (name_map.find(low_column_name) == name_map.end()) { - // Name does not exist yet - name_map[low_column_name]++; - } else { - // Name already exists, we add _x where x is the repetition number - string new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]); - auto new_column_name_low = StringUtil::Lower(new_column_name); - while (name_map.find(new_column_name_low) != name_map.end()) { - // This name is already here due to a previous definition - name_map[low_column_name]++; - new_column_name = column_name + "_" + std::to_string(name_map[low_column_name]); - new_column_name_low = StringUtil::Lower(new_column_name); - } - column_name = new_column_name; - name_map[new_column_name_low]++; - } + if (lhs_is_const == rhs_is_const) { + // Both sides are constant or both sides are column refs + continue; } - return std::move(result); - } - - //------------------------------------------------------------------------------------------------------------------ - // Init Global - //------------------------------------------------------------------------------------------------------------------ - struct GlobalState final : ArrowScanGlobalState { - GDALDatasetUniquePtr dataset; - atomic lines_read; + auto &constant_expr = func.children[lhs_is_const ? 0 : 1]->Cast(); + auto &geometry_expr = func.children[lhs_is_const ? 1 : 0]->Cast(); - explicit GlobalState(GDALDatasetUniquePtr dataset) : dataset(std::move(dataset)), lines_read(0) { + if (constant_expr.value.type().id() != LogicalTypeId::GEOMETRY) { + // Constant is not geometry + continue; + } + if (constant_expr.value.IsNull()) { + // Constant is NULL + continue; + } + if (geometry_expr.alias != "geom") { + // Not the geometry column + continue; } - }; - static unique_ptr InitGlobal(ClientContext &context, TableFunctionInitInput &input) { - auto &data = input.bind_data->Cast(); + auto geom_extent = GeometryExtent::Empty(); + auto geom_binary = string_t(StringValue::Get(constant_expr.value)); - auto dataset = GDALDatasetUniquePtr(GDALDataset::Open( - data.prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR | GDAL_OF_READONLY, - data.dataset_allowed_drivers, data.dataset_open_options, data.dataset_sibling_files)); - if (dataset == nullptr) { - const auto error = string(CPLGetLastErrorMsg()); - throw IOException("Could not open file: " + data.raw_file_name + " (" + error + ")"); + if (Geometry::GetExtent(geom_binary, geom_extent)) { + bdata.has_filter = true; + bdata.layer_filter.MinX = geom_extent.x_min; + bdata.layer_filter.MinY = geom_extent.y_min; + bdata.layer_filter.MaxX = geom_extent.x_max; + bdata.layer_filter.MaxY = geom_extent.y_max; } - auto global_state = make_uniq(std::move(dataset)); - auto &gstate = *global_state; + // Set the index so we can remove it later + // We can __ONLY__ do this if the filter predicate is "&&" or "st_intersects_extent" + // as other predicates may require exact geometry evaluation, the filter cannot be fully removed + geom_filter_idx = expr_idx; + break; + } - // Open the layer - OGRLayer *layer = nullptr; - if (data.sequential_layer_scan) { - // Get the layer from the dataset by scanning through the layers - for (int i = 0; i < gstate.dataset->GetLayerCount(); i++) { - layer = gstate.dataset->GetLayer(i); - if (i == data.layer_idx) { - // desired layer found - break; - } - // else scan through and empty the layer - OGRFeature *feature; - while ((feature = layer->GetNextFeature()) != nullptr) { - OGRFeature::DestroyFeature(feature); - } - } - } else { - // Otherwise get the layer directly - layer = gstate.dataset->GetLayer(data.layer_idx); + if (geom_filter_idx != optional_idx::Invalid()) { + // Remove the filter from the list + filters.erase_at(geom_filter_idx.GetIndex()); + } +} + +//---------------------------------------------------------------------------------------------------------------------- +// GLOBAL STATE +//---------------------------------------------------------------------------------------------------------------------- +class GlobalState final : public GlobalTableFunctionState { +public: + ~GlobalState() override { + if (dataset) { + GDALClose(dataset); + dataset = nullptr; } - if (!layer) { - throw IOException("Could not get layer"); + + if (stream.release) { + stream.release(&stream); } + } - // Apply spatial filter (if we got one) - TryApplySpatialFilter(layer, data.spatial_filter.get()); - // TODO: Apply projection pushdown + GDALDatasetH dataset; + CPLStringList layer_options; + OGRLayerH layer; + ArrowArrayStream stream; + vector> col_types; + atomic features_read = {0}; +}; - // Create arrow stream from layer +auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique_ptr { + auto &bdata = input.bind_data->Cast(); - gstate.stream = make_uniq(); + const auto dataset = GDALOpenEx(bdata.gdal_file_path.c_str(), GDAL_OF_VECTOR | GDAL_OF_READONLY, + bdata.dataset_drivers, bdata.dataset_options, bdata.dataset_sibling); - // set layer options - if (!layer->GetArrowStream(&gstate.stream->arrow_array_stream, data.layer_creation_options)) { - throw IOException("Could not get arrow stream"); - } + if (!dataset) { + throw IOException("Could not open GDAL dataset at: %s", bdata.real_file_path); + } - // Set max 1 thread - gstate.max_threads = 1; + auto result = make_uniq(); + result->dataset = dataset; + result->layer_options = bdata.layer_options; + + const auto driver = GDALGetDatasetDriver(dataset); + if (strcmp(GDALGetDriverShortName(driver), "OSM") != 0) { + // Get the layer by index + result->layer = GDALDatasetGetLayer(dataset, bdata.layer_idx); + } else { + // Special case for OSM, which requires sequential reading of layers + const auto layer_count = GDALDatasetGetLayerCount(dataset); + for (int i = 0; i < layer_count; i++) { + result->layer = GDALDatasetGetLayer(dataset, i); + if (i == bdata.layer_idx) { + // desired layer found + break; + } - if (input.CanRemoveFilterColumns()) { - gstate.projection_ids = input.projection_ids; - for (const auto &col_idx : input.column_ids) { - if (col_idx == COLUMN_IDENTIFIER_ROW_ID) { - gstate.scanned_types.emplace_back(LogicalType::ROW_TYPE); - } else { - gstate.scanned_types.push_back(data.all_types[col_idx]); - } + // else scan through and empty the layer + OGRFeatureH feature; + while ((feature = OGR_L_GetNextFeature(result->layer)) != nullptr) { + OGR_F_Destroy(feature); } } + } - return std::move(global_state); + // Set the filter, if we got one + if (bdata.has_filter) { + OGR_L_SetSpatialFilterRect(result->layer, bdata.layer_filter.MinX, bdata.layer_filter.MinY, + bdata.layer_filter.MaxX, bdata.layer_filter.MaxY); } - //------------------------------------------------------------------------------------------------------------------ - // Init Local - //------------------------------------------------------------------------------------------------------------------ - struct LocalState final : ArrowScanLocalState { - ArenaAllocator arena; - GeometryAllocator alloc; + CPLStringList layer_options; + layer_options.AddString(StringUtil::Format("MAX_FEATURES_IN_BATCH=%d", STANDARD_VECTOR_SIZE).data()); + layer_options.AddString("GEOMETRY_METADATA_ENCODING=GEOARROW"); - sgl::wkb_reader wkb_reader; + // Open the Arrow stream + if (!OGR_L_GetArrowStream(result->layer, &result->stream, result->layer_options.List())) { + GDALClose(dataset); + throw IOException("Could not get GDAL Arrow stream"); + } - explicit LocalState(unique_ptr current_chunk, ClientContext &context) - : ArrowScanLocalState(std::move(current_chunk), context), arena(BufferAllocator::Get(context)), - alloc(arena), wkb_reader(alloc) { + ArrowSchema schema; + if (result->stream.get_schema(&result->stream, &schema) != 0) { + result->stream.release(&result->stream); + GDALClose(dataset); + throw IOException("Could not get GDAL Arrow schema"); + } - // Setup WKB reader - wkb_reader.set_allow_mixed_zm(true); - wkb_reader.set_nan_as_empty(true); - } + // Store the column types + for (int64_t i = 0; i < schema.n_children; i++) { + auto &child_schema = *schema.children[i]; + result->col_types.push_back(ArrowType::GetTypeFromSchema(context.db->config, child_schema)); + } - void ConvertWKB(Vector &source, Vector &target, idx_t count) { + return std::move(result); +} - // Reset allocator - arena.Reset(); +//---------------------------------------------------------------------------------------------------------------------- +// SCAN +//---------------------------------------------------------------------------------------------------------------------- +void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + auto &state = input.global_state->Cast(); + + ArrowArray arrow_array; + if (state.stream.get_next(&state.stream, &arrow_array) != 0 || arrow_array.release == nullptr) { + // Finished reading + output.SetCardinality(0); + return; + } - UnaryExecutor::Execute(source, target, count, [&](const string_t &wkb) { - const auto wkb_ptr = wkb.GetDataUnsafe(); - const auto wkb_len = wkb.GetSize(); + // Now convert the Arrow array to DuckDB + for (idx_t i = 0; i < arrow_array.n_children; i++) { + auto &arr = *arrow_array.children[i]; + auto &vec = output.data[i]; - sgl::geometry geom; - ; + auto &arrow_type = *state.col_types[i]; + auto array_state = ArrowArrayScanState(context); - if (!wkb_reader.try_parse(geom, wkb_ptr, wkb_len)) { - const auto error = wkb_reader.get_error_message(); - throw InvalidInputException("Could not parse WKB input: %s", error); - } + // We need to make sure that our chunk will hold the ownership + array_state.owned_data = make_shared_ptr(); + array_state.owned_data->arrow_array = arrow_array; - // Enforce that we have a cohesive ZM layout - if (wkb_reader.parsed_mixed_zm()) { - sgl::ops::force_zm(alloc, geom, wkb_reader.parsed_any_z(), wkb_reader.parsed_any_m(), 0, 0); - } + // We set it to nullptr to effectively transfer the ownership + arrow_array.release = nullptr; - // Serialize the geometry into a blob - const auto size = Serde::GetRequiredSize(geom); - auto blob = StringVector::EmptyString(target, size); - Serde::Serialize(geom, blob.GetDataWriteable(), size); - blob.Finalize(); - return blob; - }); + switch (arrow_type.GetPhysicalType()) { + case ArrowArrayPhysicalType::DICTIONARY_ENCODED: + ArrowToDuckDBConversion::ColumnArrowToDuckDBDictionary(vec, arr, 0, array_state, arrow_array.length, + arrow_type); + break; + case ArrowArrayPhysicalType::RUN_END_ENCODED: + ArrowToDuckDBConversion::ColumnArrowToDuckDBRunEndEncoded(vec, arr, 0, array_state, arrow_array.length, + arrow_type); + break; + case ArrowArrayPhysicalType::DEFAULT: + ArrowToDuckDBConversion::SetValidityMask(vec, arr, 0, arrow_array.length, arrow_array.offset, -1); + ArrowToDuckDBConversion::ColumnArrowToDuckDB(vec, arr, 0, array_state, arrow_array.length, arrow_type); + break; + default: + throw NotImplementedException("ArrowArrayPhysicalType not recognized"); } - }; + } - static unique_ptr InitLocal(ExecutionContext &context, TableFunctionInitInput &input, - GlobalTableFunctionState *gstate_p) { + state.features_read += arrow_array.length; + output.SetCardinality(arrow_array.length); +} - auto &gstate = gstate_p->Cast(); - auto current_chunk = make_uniq(); - auto result = make_uniq(std::move(current_chunk), context.client); +//------------------------------------------------------------------------------------------------------------------ +// CARDINALITY +//------------------------------------------------------------------------------------------------------------------ +auto Cardinality(ClientContext &context, const FunctionData *data) -> unique_ptr { + auto &bdata = data->Cast(); + auto result = make_uniq(); + + if (bdata.estimated_cardinality > -1) { + result->has_estimated_cardinality = true; + result->estimated_cardinality = bdata.estimated_cardinality; + result->has_max_cardinality = true; + result->max_cardinality = bdata.estimated_cardinality; + } - result->column_ids = input.column_ids; - result->filters = input.filters.get(); + return result; +} - if (input.CanRemoveFilterColumns()) { - result->all_columns.Initialize(context.client, gstate.scanned_types); - } +//---------------------------------------------------------------------------------------------------------------------- +// STATISTICS +//---------------------------------------------------------------------------------------------------------------------- +auto Statistics(ClientContext &context, const FunctionData *bind_data, column_t column_index) + -> unique_ptr { - if (!ArrowTableFunction::ArrowScanParallelStateNext(context.client, input.bind_data.get(), *result, gstate)) { - return nullptr; - } + auto &bdata = bind_data->Cast(); - return std::move(result); - } + // If we have an extent, and the column is a geometry column, we can provide min/max stats + if (bdata.has_extent) { - //------------------------------------------------------------------------------------------------------------------ - // Execute - //------------------------------------------------------------------------------------------------------------------ - static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - if (!input.local_state) { - return; - } + // Check if this is the only geometry column + const auto is_geom_col = bdata.geometry_columns.find(column_index) != bdata.geometry_columns.end(); + const auto is_only_one = bdata.geometry_columns.size() == 1; + const auto has_stats = bdata.has_extent || bdata.layer_type != wkbUnknown; - auto &data = input.bind_data->Cast(); - auto &state = input.local_state->Cast(); - auto &gstate = input.global_state->Cast(); + if (is_geom_col && is_only_one && has_stats) { + auto stats = GeometryStats::CreateUnknown(LogicalType::GEOMETRY()); - //! Out of tuples in this chunk - if (state.chunk_offset >= static_cast(state.chunk->arrow_array.length)) { - if (!ArrowTableFunction::ArrowScanParallelStateNext(context, input.bind_data.get(), state, gstate)) { - return; + if (bdata.has_extent) { + auto &extent = GeometryStats::GetExtent(stats); + extent.x_min = bdata.layer_extent.MinX; + extent.x_max = bdata.layer_extent.MaxX; + extent.y_min = bdata.layer_extent.MinY; + extent.y_max = bdata.layer_extent.MaxY; } - } - auto output_size = MinValue(STANDARD_VECTOR_SIZE, state.chunk->arrow_array.length - state.chunk_offset); - gstate.lines_read += output_size; + const auto geom_type = bdata.layer_type % 1000; + const auto vert_type = bdata.layer_type / 1000; - if (gstate.CanRemoveFilterColumns()) { - state.all_columns.Reset(); - state.all_columns.SetCardinality(output_size); - ArrowTableFunction::ArrowToDuckDB(state, data.arrow_table.GetColumns(), state.all_columns, - gstate.lines_read - output_size, false); - output.ReferenceColumns(state.all_columns, gstate.projection_ids); - } else { - output.SetCardinality(output_size); - ArrowTableFunction::ArrowToDuckDB(state, data.arrow_table.GetColumns(), output, - gstate.lines_read - output_size, false); - } - - if (!data.keep_wkb) { - // Find the geometry columns - for (idx_t col_idx = 0; col_idx < state.column_ids.size(); col_idx++) { - auto mapped_idx = state.column_ids[col_idx]; - if (data.geometry_column_ids.find(mapped_idx) != data.geometry_column_ids.end()) { - // Found a geometry column - // Convert the WKB columns to a geometry column - - Vector geom_vec(LogicalType::GEOMETRY(), output_size); - state.ConvertWKB(output.data[col_idx], geom_vec, output_size); - - output.data[col_idx].ReferenceAndSetType(geom_vec); - } + if ((geom_type >= 1) && (geom_type <= 7) && (vert_type >= 0) && (vert_type <= 3)) { + auto &types = GeometryStats::GetTypes(stats); + types.Clear(); + types.AddWKBType(static_cast(geom_type)); } - } - output.Verify(); - state.chunk_offset += output.size(); + return stats.ToUnique(); + } } - //------------------------------------------------------------------------------------------------------------------ - // Cardinality - //------------------------------------------------------------------------------------------------------------------ - static unique_ptr Cardinality(ClientContext &context, const FunctionData *data) { - auto &bind_data = data->Cast(); - auto result = make_uniq(); + return nullptr; +} - if (bind_data.has_approximate_feature_count) { - result->has_estimated_cardinality = true; - result->estimated_cardinality = bind_data.approximate_feature_count; - } - return result; +//---------------------------------------------------------------------------------------------------------------------- +// PROGRESS +//---------------------------------------------------------------------------------------------------------------------- +auto Progress(ClientContext &context, const FunctionData *b_data, const GlobalTableFunctionState *g_state) -> double { + auto &bdata = b_data->Cast(); + auto &gstate = g_state->Cast(); + + if (bdata.estimated_cardinality < 0) { + return 0.0; } - //------------------------------------------------------------------------------------------------------------------ - // Replacement Scan - //------------------------------------------------------------------------------------------------------------------ - static unique_ptr ReplacementScan(ClientContext &, ReplacementScanInput &input, - optional_ptr) { - auto &table_name = input.table_name; - auto lower_name = StringUtil::Lower(table_name); - // Check if the table name ends with some common geospatial file extensions - if (StringUtil::EndsWith(lower_name, ".gpkg") || StringUtil::EndsWith(lower_name, ".fgb")) { + const auto count = static_cast(gstate.features_read.load()); + const auto total = static_cast(bdata.estimated_cardinality); - auto table_function = make_uniq(); - vector> children; - children.push_back(make_uniq(Value(table_name))); - table_function->function = make_uniq("ST_Read", std::move(children)); - return std::move(table_function); - } - // else not something we can replace - return nullptr; + return MinValue(100.0 * (total / count), 100.0); +} + +//------------------------------------------------------------------------------------------------------------------ +// REPLACEMENT SCAN +//------------------------------------------------------------------------------------------------------------------ +auto ReplacementScan(ClientContext &, ReplacementScanInput &input, optional_ptr) + -> unique_ptr { + auto &table_name = input.table_name; + auto lower_name = StringUtil::Lower(table_name); + // Check if the table name ends with some common geospatial file extensions + if (StringUtil::EndsWith(lower_name, ".gpkg") || StringUtil::EndsWith(lower_name, ".fgb")) { + + auto table_function = make_uniq(); + vector> children; + children.push_back(make_uniq(Value(table_name))); + table_function->function = make_uniq("ST_Read", std::move(children)); + return std::move(table_function); } + // else not something we can replace + return nullptr; +} - //------------------------------------------------------------------------------------------------------------------ - // Documentation - //------------------------------------------------------------------------------------------------------------------ - static constexpr auto DOCUMENTATION = R"( +//---------------------------------------------------------------------------------------------------------------------- +// REGISTER +//---------------------------------------------------------------------------------------------------------------------- +static constexpr auto DOCUMENTATION = R"( Read and import a variety of geospatial file formats using the GDAL library. The `ST_Read` table function is based on the [GDAL](https://gdal.org/index.html) translator library and enables reading spatial data from a variety of geospatial vector file formats as if they were DuckDB tables. @@ -1069,7 +1044,7 @@ struct ST_Read : ArrowTableFunction { | FlatGeoBuf | .fgb | )"; - static constexpr auto EXAMPLE = R"( +static constexpr auto EXAMPLE = R"( -- Read a Shapefile SELECT * FROM ST_Read('some/file/path/filename.shp'); @@ -1077,1003 +1052,772 @@ struct ST_Read : ArrowTableFunction { CREATE TABLE my_geojson_table AS SELECT * FROM ST_Read('some/file/path/filename.json'); )"; - //------------------------------------------------------------------------------------------------------------------ - // Register - //------------------------------------------------------------------------------------------------------------------ - static void Register(ExtensionLoader &loader) { - TableFunction func("ST_Read", {LogicalType::VARCHAR}, Execute, Bind, InitGlobal, InitLocal); - - func.cardinality = Cardinality; - func.get_partition_data = ArrowTableFunction::ArrowGetPartitionData; +void Register(ExtensionLoader &loader) { + TableFunction read_func("ST_Read", {LogicalType::VARCHAR}, Scan, Bind, InitGlobal); + read_func.cardinality = Cardinality; + read_func.statistics = Statistics; + read_func.table_scan_progress = Progress; + read_func.pushdown_complex_filter = Pushdown; - func.projection_pushdown = true; + read_func.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR); + read_func.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); + read_func.named_parameters["sibling_files"] = LogicalType::LIST(LogicalType::VARCHAR); + read_func.named_parameters["layer"] = LogicalType::VARCHAR; + read_func.named_parameters["max_batch_size"] = LogicalType::INTEGER; + read_func.named_parameters["keep_wkb"] = LogicalType::BOOLEAN; - func.named_parameters["open_options"] = LogicalType::LIST(LogicalType::VARCHAR); - func.named_parameters["allowed_drivers"] = LogicalType::LIST(LogicalType::VARCHAR); - func.named_parameters["sibling_files"] = LogicalType::LIST(LogicalType::VARCHAR); - func.named_parameters["spatial_filter_box"] = GeoTypes::BOX_2D(); - func.named_parameters["spatial_filter"] = LogicalType::GEOMETRY(); - func.named_parameters["layer"] = LogicalType::VARCHAR; - func.named_parameters["sequential_layer_scan"] = LogicalType::BOOLEAN; - func.named_parameters["max_batch_size"] = LogicalType::INTEGER; - func.named_parameters["keep_wkb"] = LogicalType::BOOLEAN; - loader.RegisterFunction(func); + loader.RegisterFunction(read_func); - InsertionOrderPreservingMap tags; - tags.insert("ext", "spatial"); - FunctionBuilder::AddTableFunctionDocs(loader, "ST_Read", DOCUMENTATION, EXAMPLE, tags); + InsertionOrderPreservingMap tags; + tags.insert("ext", "spatial"); + FunctionBuilder::AddTableFunctionDocs(loader, "ST_Read", DOCUMENTATION, EXAMPLE, tags); - // Replacement scan - auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance()); - config.replacement_scans.emplace_back(ReplacementScan); - } -}; + auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance()); + config.replacement_scans.emplace_back(ReplacementScan); +} +} // namespace gdal_read //====================================================================================================================== -// ST_Read_Meta +// GDAL COPY //====================================================================================================================== -const auto GEOMETRY_FIELD_TYPE = LogicalType::STRUCT({ - {"name", LogicalType::VARCHAR}, - {"type", LogicalType::VARCHAR}, - {"nullable", LogicalType::BOOLEAN}, - {"crs", LogicalType::STRUCT({ - {"name", LogicalType::VARCHAR}, - {"auth_name", LogicalType::VARCHAR}, - {"auth_code", LogicalType::VARCHAR}, - {"wkt", LogicalType::VARCHAR}, - {"proj4", LogicalType::VARCHAR}, - {"projjson", LogicalType::VARCHAR}, - })}, -}); - -const auto STANDARD_FIELD_TYPE = LogicalType::STRUCT({ - {"name", LogicalType::VARCHAR}, - {"type", LogicalType::VARCHAR}, - {"subtype", LogicalType::VARCHAR}, - {"nullable", LogicalType::BOOLEAN}, - {"unique", LogicalType::BOOLEAN}, - {"width", LogicalType::BIGINT}, - {"precision", LogicalType::BIGINT}, -}); - -const auto LAYER_TYPE = LogicalType::STRUCT({ - {"name", LogicalType::VARCHAR}, - {"feature_count", LogicalType::BIGINT}, - {"geometry_fields", LogicalType::LIST(GEOMETRY_FIELD_TYPE)}, - {"fields", LogicalType::LIST(STANDARD_FIELD_TYPE)}, -}); - -struct ST_Read_Meta { - - //------------------------------------------------------------------------------------------------------------------ - // Bind - //------------------------------------------------------------------------------------------------------------------ - struct BindData final : TableFunctionData { - vector file_names; - - explicit BindData(vector file_names_p) : file_names(std::move(file_names_p)) { - } - }; - - static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - - names.push_back("file_name"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("driver_short_name"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("driver_long_name"); - return_types.push_back(LogicalType::VARCHAR); - - names.push_back("layers"); - return_types.push_back(LogicalType::LIST(LAYER_TYPE)); - - // TODO: Add metadata, domains, relationships - - // Get the filename list - const auto mfreader = MultiFileReader::Create(input.table_function); - const auto mflist = mfreader->CreateFileList(context, input.inputs[0], FileGlobOptions::ALLOW_EMPTY); - return make_uniq_base(mflist->GetAllFiles()); - } - - //------------------------------------------------------------------------------------------------------------------ - // Init - //------------------------------------------------------------------------------------------------------------------ - struct State final : GlobalTableFunctionState { - idx_t current_idx; - explicit State() : current_idx(0) { - } - }; - - static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { - return make_uniq_base(); - } - - //------------------------------------------------------------------------------------------------------------------ - // Execute - //------------------------------------------------------------------------------------------------------------------ - static Value GetLayerData(const GDALDatasetUniquePtr &dataset) { - - vector layer_values; - for (const auto &layer : dataset->GetLayers()) { - child_list_t layer_value_fields; - - layer_value_fields.emplace_back("name", Value(layer->GetName())); - layer_value_fields.emplace_back("feature_count", Value(static_cast(layer->GetFeatureCount()))); - - vector geometry_fields; - for (const auto &field : layer->GetLayerDefn()->GetGeomFields()) { - child_list_t geometry_field_value_fields; - auto field_name = field->GetNameRef(); - if (std::strlen(field_name) == 0) { - field_name = "geom"; - } - geometry_field_value_fields.emplace_back("name", Value(field_name)); - geometry_field_value_fields.emplace_back("type", Value(OGRGeometryTypeToName(field->GetType()))); - geometry_field_value_fields.emplace_back("nullable", Value(static_cast(field->IsNullable()))); - - const auto crs = field->GetSpatialRef(); - if (crs != nullptr) { - child_list_t crs_value_fields; - crs_value_fields.emplace_back("name", Value(crs->GetName())); - crs_value_fields.emplace_back("auth_name", Value(crs->GetAuthorityName(nullptr))); - crs_value_fields.emplace_back("auth_code", Value(crs->GetAuthorityCode(nullptr))); - - char *wkt_ptr = nullptr; - crs->exportToWkt(&wkt_ptr); - crs_value_fields.emplace_back("wkt", wkt_ptr ? Value(wkt_ptr) : Value()); - CPLFree(wkt_ptr); - - char *proj4_ptr = nullptr; - crs->exportToProj4(&proj4_ptr); - crs_value_fields.emplace_back("proj4", proj4_ptr ? Value(proj4_ptr) : Value()); - CPLFree(proj4_ptr); - - char *projjson_ptr = nullptr; - crs->exportToPROJJSON(&projjson_ptr, nullptr); - crs_value_fields.emplace_back("projjson", projjson_ptr ? Value(projjson_ptr) : Value()); - CPLFree(projjson_ptr); - - geometry_field_value_fields.emplace_back("crs", Value::STRUCT(crs_value_fields)); - } else { - Value null_crs; - geometry_field_value_fields.emplace_back("crs", null_crs); - } +namespace gdal_copy { - geometry_fields.push_back(Value::STRUCT(geometry_field_value_fields)); - } - layer_value_fields.emplace_back("geometry_fields", - Value::LIST(GEOMETRY_FIELD_TYPE, std::move(geometry_fields))); - - vector standard_fields; - for (const auto &field : layer->GetLayerDefn()->GetFields()) { - child_list_t standard_field_value_fields; - standard_field_value_fields.emplace_back("name", Value(field->GetNameRef())); - standard_field_value_fields.emplace_back("type", Value(OGR_GetFieldTypeName(field->GetType()))); - standard_field_value_fields.emplace_back("subtype", - Value(OGR_GetFieldSubTypeName(field->GetSubType()))); - standard_field_value_fields.emplace_back("nullable", Value(field->IsNullable())); - standard_field_value_fields.emplace_back("unique", Value(field->IsUnique())); - standard_field_value_fields.emplace_back("width", Value(field->GetWidth())); - standard_field_value_fields.emplace_back("precision", Value(field->GetPrecision())); - standard_fields.push_back(Value::STRUCT(standard_field_value_fields)); - } - layer_value_fields.emplace_back("fields", Value::LIST(STANDARD_FIELD_TYPE, std::move(standard_fields))); +//---------------------------------------------------------------------------------------------------------------------- +// Bind +//---------------------------------------------------------------------------------------------------------------------- +class BindData final : public TableFunctionData { +public: + //string gdal_file_path; + //string real_file_path; + string driver_name; + string layer_name; + + CPLStringList driver_options; + CPLStringList layer_options; - layer_values.push_back(Value::STRUCT(layer_value_fields)); + string target_srs; + OGRwkbGeometryType geometry_type; + + // Arrow info + ClientProperties props; + ArrowSchema schema; + unordered_map> extension_type_cast; + + ~BindData() override { + if (schema.release) { + schema.release(&schema); } + } +}; - return Value::LIST(LAYER_TYPE, std::move(layer_values)); +bool MatchOption(const char *name, const pair> &option, bool list = false) { + if (StringUtil::CIEquals(name, option.first)) { + if (option.second.empty()) { + throw BinderException("GDAL COPY option '%s' requires a value", name); + } + if (!list) { + if (option.second.size() != 1) { + throw BinderException("GDAL COPY option '%s' only accepts a single value", name); + } + if (option.second.back().type().id() != LogicalTypeId::VARCHAR) { + throw BinderException("GDAL COPY option '%s' must be a string", name); + } + } else { + for (auto &val : option.second) { + if (val.type().id() != LogicalTypeId::VARCHAR) { + throw BinderException("GDAL COPY option '%s' must be a list of strings", name); + } + } + } + return true; } + return false; +} + +auto Bind(ClientContext &context, CopyFunctionBindInput &input, const vector &names, + const vector &sql_types) -> unique_ptr { + auto result = make_uniq(); - static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - auto &bind_data = input.bind_data->Cast(); - auto &state = input.global_state->Cast(); + // Set file pat + const auto &file_path = input.info.file_path; - const auto remaining = MinValue(STANDARD_VECTOR_SIZE, bind_data.file_names.size() - state.current_idx); - auto output_idx = 0; + // Parse options + for (auto &option : input.info.options) { - for (idx_t in_idx = 0; in_idx < remaining; in_idx++, state.current_idx++) { - auto &file = bind_data.file_names[state.current_idx]; - auto prefixed_file_name = GDALClientContextState::GetOrCreate(context).GetPrefix(file.path); + if (MatchOption("DRIVER", option)) { + result->driver_name = option.second.back().GetValue(); + continue; + } - GDALDatasetUniquePtr dataset; - try { - dataset = GDALDatasetUniquePtr( - GDALDataset::Open(prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR)); - } catch (...) { - // Just skip anything we cant open - continue; + if (MatchOption("LAYER_NAME", option)) { + result->layer_name = option.second.back().GetValue(); + continue; + } + + if (MatchOption("SRS", option) || MatchOption("CRS", option)) { + result->target_srs = option.second.back().GetValue(); + continue; + } + + if (MatchOption("GEOMETRY_TYPE", option)) { + auto type = option.second.back().GetValue(); + if (StringUtil::CIEquals(type, "POINT")) { + result->geometry_type = wkbPoint; + } else if (StringUtil::CIEquals(type, "LINESTRING")) { + result->geometry_type = wkbLineString; + } else if (StringUtil::CIEquals(type, "POLYGON")) { + result->geometry_type = wkbPolygon; + } else if (StringUtil::CIEquals(type, "MULTIPOINT")) { + result->geometry_type = wkbMultiPoint; + } else if (StringUtil::CIEquals(type, "MULTILINESTRING")) { + result->geometry_type = wkbMultiLineString; + } else if (StringUtil::CIEquals(type, "MULTIPOLYGON")) { + result->geometry_type = wkbMultiPolygon; + } else if (StringUtil::CIEquals(type, "GEOMETRYCOLLECTION")) { + result->geometry_type = wkbGeometryCollection; + } else { + throw BinderException("Unsupported GEOMETRY_TYPE: '%s'", type); } + continue; + } - output.data[0].SetValue(output_idx, file.path); - output.data[1].SetValue(output_idx, dataset->GetDriver()->GetDescription()); - output.data[2].SetValue(output_idx, dataset->GetDriver()->GetMetadataItem(GDAL_DMD_LONGNAME)); - output.data[3].SetValue(output_idx, GetLayerData(dataset)); + if (MatchOption("LAYER_CREATION_OPTIONS", option, true)) { + for (auto &val : option.second) { + result->layer_options.AddString(val.GetValue().c_str()); + } + continue; + } - output_idx++; + if (MatchOption("DATASET_CREATION_OPTIONS", option, true)) { + for (auto &val : option.second) { + result->driver_options.AddString(val.GetValue().c_str()); + } + continue; } - output.SetCardinality(output_idx); + throw BinderException("Unknown GDAL COPY option: '%s'", option.first); } - //------------------------------------------------------------------------------------------------------------------ - // Documentation - //------------------------------------------------------------------------------------------------------------------ - // static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; - - static constexpr auto DESCRIPTION = R"( - Read the metadata from a variety of geospatial file formats using the GDAL library. + // Check that options are valid + if (result->driver_name.empty()) { + throw BinderException("GDAL COPY option 'DRIVER' is required"); + } - The `ST_Read_Meta` table function accompanies the `ST_Read` table function, but instead of reading the contents of a file, this function scans the metadata instead. - Since the data model of the underlying GDAL library is quite flexible, most of the interesting metadata is within the returned `layers` column, which is a somewhat complex nested structure of DuckDB `STRUCT` and `LIST` types. - )"; + if (result->layer_name.empty()) { + auto &fs = FileSystem::GetFileSystem(context); + result->layer_name = fs.ExtractBaseName(file_path); + } - static constexpr auto EXAMPLE = R"( - -- Find the coordinate reference system authority name and code for the first layers first geometry column in the file - SELECT - layers[1].geometry_fields[1].crs.auth_name as name, - layers[1].geometry_fields[1].crs.auth_code as code - FROM st_read_meta('../../tmp/data/amsterdam_roads.fgb'); - )"; + // Check the driver + const auto driver = GDALGetDriverByName(result->driver_name.c_str()); + if (!driver) { + throw BinderException("Could not find GDAL driver: " + result->driver_name); + } - //------------------------------------------------------------------------------------------------------------------ - // Register - //------------------------------------------------------------------------------------------------------------------ - static void Register(ExtensionLoader &loader) { - const TableFunction func("ST_Read_Meta", {LogicalType::VARCHAR}, Execute, Bind, Init); - loader.RegisterFunction(MultiFileReader::CreateFunctionSet(func)); + // Try to get the file extension from the driver + const auto file_ext = GDALGetMetadataItem(driver, GDAL_DMD_EXTENSIONS, nullptr); + if (file_ext) { + input.file_extension = file_ext; + } else { + const auto file_exts = GDALGetMetadataItem(driver, GDAL_DMD_EXTENSIONS, nullptr); + const auto exts = StringUtil::Split(file_exts, ' '); + if (!exts.empty()) { + input.file_extension = exts[0]; + } + } - InsertionOrderPreservingMap tags; - tags.insert("ext", "spatial"); - FunctionBuilder::AddTableFunctionDocs(loader, "ST_Read_Meta", DESCRIPTION, EXAMPLE, tags); + // Driver-specific checks + if (result->driver_name == "OpenFileGDB" && result->geometry_type == wkbUnknown) { + throw BinderException("OpenFileGDB requires 'GEOMETRY_TYPE' parameter to be set when writing!"); } -}; -//====================================================================================================================== -// ST_Drivers -//====================================================================================================================== + // Setup arrow schema + result->props = context.GetClientProperties(); + result->extension_type_cast = duckdb::ArrowTypeExtensionData::GetExtensionTypes(context, sql_types); + ArrowConverter::ToArrowSchema(&result->schema, sql_types, names, result->props); -struct ST_Drivers { + return std::move(result); +} - //------------------------------------------------------------------------------------------------------------------ - // Bind - //------------------------------------------------------------------------------------------------------------------ - struct BindData final : TableFunctionData { - idx_t driver_count; - explicit BindData(const idx_t driver_count_p) : driver_count(driver_count_p) { +//---------------------------------------------------------------------------------------------------------------------- +// Global State +//---------------------------------------------------------------------------------------------------------------------- +class GlobalState final : public GlobalFunctionData { +public: + ~GlobalState() override { + if (dataset) { + GDALClose(dataset); + dataset = nullptr; } - }; + if (srs) { + OSRDestroySpatialReference(srs); + srs = nullptr; + } + } - static unique_ptr Bind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { + mutex lock; + GDALDatasetH dataset = nullptr; + OGRLayerH layer = nullptr; + OGRSpatialReferenceH srs = nullptr; +}; - return_types.emplace_back(LogicalType::VARCHAR); - return_types.emplace_back(LogicalType::VARCHAR); - return_types.emplace_back(LogicalType::BOOLEAN); - return_types.emplace_back(LogicalType::BOOLEAN); - return_types.emplace_back(LogicalType::BOOLEAN); - return_types.emplace_back(LogicalType::VARCHAR); - names.emplace_back("short_name"); - names.emplace_back("long_name"); - names.emplace_back("can_create"); - names.emplace_back("can_copy"); - names.emplace_back("can_open"); - names.emplace_back("help_url"); +auto InitGlobal(ClientContext &context, FunctionData &bdata_p, const string &real_file_path) + -> unique_ptr { + auto &bdata = bdata_p.Cast(); + auto result = make_uniq(); - return make_uniq_base(GDALGetDriverCount()); + const auto driver = GDALGetDriverByName(bdata.driver_name.c_str()); + if (!driver) { + throw InvalidInputException("Could not find GDAL driver: " + bdata.driver_name); } - //------------------------------------------------------------------------------------------------------------------ - // Init - //------------------------------------------------------------------------------------------------------------------ - struct State final : GlobalTableFunctionState { - idx_t current_idx; - explicit State() : current_idx(0) { - } - }; + const auto &file_prefix = DuckDBFileSystemPrefix::GetOrCreate(context); + const auto gdal_file_path = file_prefix.AddPrefix(real_file_path); - static unique_ptr Init(ClientContext &context, TableFunctionInitInput &input) { - return make_uniq_base(); + // Create Dataset + result->dataset = GDALCreate(driver, gdal_file_path.c_str(), 0, 0, 0, GDT_Unknown, bdata.driver_options); + if (!result->dataset) { + throw IOException("Could not create GDAL dataset at: " + real_file_path); } - //------------------------------------------------------------------------------------------------------------------ - // Execute - //------------------------------------------------------------------------------------------------------------------ - static void Execute(ClientContext &context, TableFunctionInput &input, DataChunk &output) { - auto &state = input.global_state->Cast(); - auto &bind_data = input.bind_data->Cast(); + if (!bdata.target_srs.empty()) { + // Make a new spatial reference object, and set it from the user input + result->srs = OSRNewSpatialReference(nullptr); + OSRSetFromUserInput(result->srs, bdata.target_srs.c_str()); + } - idx_t count = 0; - auto next_idx = MinValue(state.current_idx + STANDARD_VECTOR_SIZE, bind_data.driver_count); + // Create Layer + result->layer = GDALDatasetCreateLayer(result->dataset, bdata.layer_name.c_str(), result->srs, bdata.geometry_type, + bdata.layer_options); - for (; state.current_idx < next_idx; state.current_idx++) { - auto driver = GDALGetDriver(static_cast(state.current_idx)); + if (!result->layer) { + throw IOException("Could not create GDAL layer in dataset at: " + real_file_path); + } - // Check if the driver is a vector driver - if (GDALGetMetadataItem(driver, GDAL_DCAP_VECTOR, nullptr) == nullptr) { - continue; + // Create fields for all children + auto geometry_field_count = 0; + for (auto i = 0; i < bdata.schema.n_children; i++) { + const auto child_schema = bdata.schema.children[i]; + + // Check if this is a geometry field + if (child_schema->metadata != nullptr) { + // TODO: Look for arrow metadata! + geometry_field_count++; + if (geometry_field_count > 1) { + throw NotImplementedException("Multiple geometry fields not supported yet"); + } + } else { + // Register normal attribute + if (!OGR_L_CreateFieldFromArrowSchema(result->layer, child_schema, nullptr)) { + throw IOException("Could not create field in GDAL layer for column: " + string(child_schema->name)); } + } + } - auto short_name = Value::CreateValue(GDALGetDriverShortName(driver)); - auto long_name = Value::CreateValue(GDALGetDriverLongName(driver)); + return std::move(result); +} - const char *create_flag = GDALGetMetadataItem(driver, GDAL_DCAP_CREATE, nullptr); - auto create_value = Value::CreateValue(create_flag != nullptr); +//---------------------------------------------------------------------------------------------------------------------- +// Local State +//---------------------------------------------------------------------------------------------------------------------- +class LocalState final : public LocalFunctionData { +public: + ~LocalState() override { + if (array.release) { + array.release(&array); + array.release = nullptr; + } + } + ArrowArray array; +}; - const char *copy_flag = GDALGetMetadataItem(driver, GDAL_DCAP_CREATECOPY, nullptr); - auto copy_value = Value::CreateValue(copy_flag != nullptr); - const char *open_flag = GDALGetMetadataItem(driver, GDAL_DCAP_OPEN, nullptr); - auto open_value = Value::CreateValue(open_flag != nullptr); +auto InitLocal(ExecutionContext &context, FunctionData &bind_data) -> unique_ptr { + auto result = make_uniq(); + return std::move(result); +} - auto help_topic_flag = GDALGetDriverHelpTopic(driver); - auto help_topic_value = help_topic_flag == nullptr - ? Value(LogicalType::VARCHAR) - : Value(StringUtil::Format("https://gdal.org/%s", help_topic_flag)); +//---------------------------------------------------------------------------------------------------------------------- +// Sink +//---------------------------------------------------------------------------------------------------------------------- +void Sink(ExecutionContext &context, FunctionData &bdata_p, GlobalFunctionData &gstate_p, LocalFunctionData &lstate_p, + DataChunk &input) { - output.data[0].SetValue(count, short_name); - output.data[1].SetValue(count, long_name); - output.data[2].SetValue(count, create_value); - output.data[3].SetValue(count, copy_value); - output.data[4].SetValue(count, open_value); - output.data[5].SetValue(count, help_topic_value); - count++; - } - output.SetCardinality(count); + const auto &bdata = bdata_p.Cast(); + auto &gstate = gstate_p.Cast(); + auto &lstate = lstate_p.Cast(); + + auto &arrow_array = lstate.array; + auto &arrow_schema = bdata.schema; + + // Convert to Arrow array + ArrowConverter::ToArrowArray(input, &arrow_array, bdata.props, bdata.extension_type_cast); + + // Sink the Arrow array into GDAL + { + // Lock + lock_guard guard(gstate.lock); + + // Sink into GDAL + OGR_L_WriteArrowBatch(gstate.layer, &arrow_schema, &arrow_array, nullptr); } - //------------------------------------------------------------------------------------------------------------------ - // Documentation - //------------------------------------------------------------------------------------------------------------------ + // Release the array + if (arrow_array.release) { + arrow_array.release(&arrow_array); + arrow_array.release = nullptr; + } +} - // static constexpr DocTag DOC_TAGS[] = {{"ext", "spatial"}}; +//---------------------------------------------------------------------------------------------------------------------- +// Combine +//---------------------------------------------------------------------------------------------------------------------- +void Combine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + LocalFunctionData &lstate) { + // Nothing to do, we don't have any local state that needs to be merged +} - static constexpr auto DESCRIPTION = R"( - Returns the list of supported GDAL drivers and file formats +//---------------------------------------------------------------------------------------------------------------------- +// Finalize +//---------------------------------------------------------------------------------------------------------------------- +void Finalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate_p) { + auto &gstate = gstate_p.Cast(); - Note that far from all of these drivers have been tested properly. - Some may require additional options to be passed to work as expected. - If you run into any issues please first consult the [consult the GDAL docs](https://gdal.org/drivers/vector/index.html). - )"; + // Flush and close the dataset + GDALFlushCache(gstate.dataset); + GDALClose(gstate.dataset); + gstate.dataset = nullptr; +} - static constexpr auto EXAMPLE = R"( - SELECT * FROM ST_Drivers(); - )"; +CopyFunctionExecutionMode Mode(bool preserve_insertion_order, bool use_batch_index) { + // Parallel writes have limited utility since we still lock on each write to GDAL layer + // But in theory we still benefit from the parallel conversion to Arrow arrays, and this also allows + // the rest of the pipeline to be parallelized if we don't care about insertion order. + return preserve_insertion_order ? CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE + : CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE; +} - //------------------------------------------------------------------------------------------------------------------ - // Register - //------------------------------------------------------------------------------------------------------------------ - static void Register(ExtensionLoader &loader) { - const TableFunction func("ST_Drivers", {}, Execute, Bind, Init); - loader.RegisterFunction(func); +//---------------------------------------------------------------------------------------------------------------------- +// Register +//---------------------------------------------------------------------------------------------------------------------- +void Register(ExtensionLoader &loader) { + CopyFunction info("GDAL"); + + info.copy_to_bind = Bind; + info.copy_to_initialize_local = InitLocal; + info.copy_to_initialize_global = InitGlobal; + info.copy_to_sink = Sink; + info.copy_to_combine = Combine; + info.copy_to_finalize = Finalize; + info.execution_mode = Mode; + info.extension = "gdal"; + + loader.RegisterFunction(info); +} - InsertionOrderPreservingMap tags; - tags.insert("ext", "spatial"); - FunctionBuilder::AddTableFunctionDocs(loader, "ST_Drivers", DESCRIPTION, EXAMPLE, tags); - } -}; +} // namespace gdal_copy +} // namespace //====================================================================================================================== -// ST_Write +// GDAL LIST //====================================================================================================================== -// TODO: This currently uses slow "Value" row-by-row conversions. GDAL now supports writing through arrow, so we should -// move into using that in the future. - -struct ST_Write { - - //------------------------------------------------------------------------------------------------------------------ - // Bind - //------------------------------------------------------------------------------------------------------------------ - struct BindData final : TableFunctionData { - - string file_path; - vector field_sql_types; - vector field_names; - string driver_name; - string layer_name; - CPLStringList dataset_creation_options; - CPLStringList layer_creation_options; - string target_srs; - OGRwkbGeometryType geometry_type = wkbUnknown; - - BindData(string file_path, vector field_sql_types, vector field_names) - : file_path(std::move(file_path)), field_sql_types(std::move(field_sql_types)), - field_names(std::move(field_names)) { - } - }; - - static unique_ptr Bind(ClientContext &context, CopyFunctionBindInput &input, - const vector &names, const vector &sql_types) { - - auto bind_data = make_uniq(input.info.file_path, sql_types, names); - - // check all the options in the copy info - // and set - for (auto &option : input.info.options) { - if (StringUtil::Upper(option.first) == "DRIVER") { - auto set = option.second.front(); - if (set.type().id() == LogicalTypeId::VARCHAR) { - bind_data->driver_name = set.GetValue(); - } else { - throw BinderException("Driver name must be a string"); - } - } else if (StringUtil::Upper(option.first) == "LAYER_NAME") { - auto set = option.second.front(); - if (set.type().id() == LogicalTypeId::VARCHAR) { - bind_data->layer_name = set.GetValue(); - } else { - throw BinderException("Layer name must be a string"); - } - } else if (StringUtil::Upper(option.first) == "LAYER_CREATION_OPTIONS") { - auto set = option.second; - for (auto &s : set) { - if (s.type().id() != LogicalTypeId::VARCHAR) { - throw BinderException("Layer creation options must be strings"); - } - auto str = s.GetValue(); - bind_data->layer_creation_options.AddString(str.c_str()); - } - } else if (StringUtil::Upper(option.first) == "DATASET_CREATION_OPTIONS") { - auto set = option.second; - for (auto &s : set) { - if (s.type().id() != LogicalTypeId::VARCHAR) { - throw BinderException("Dataset creation options must be strings"); - } - auto str = s.GetValue(); - bind_data->dataset_creation_options.AddString(str.c_str()); - } - } else if (StringUtil::Upper(option.first) == "GEOMETRY_TYPE") { - auto &set = option.second.front(); - if (set.type().id() == LogicalTypeId::VARCHAR) { - auto type = set.GetValue(); - if (StringUtil::CIEquals(type, "POINT")) { - bind_data->geometry_type = wkbPoint; - } else if (StringUtil::CIEquals(type, "LINESTRING")) { - bind_data->geometry_type = wkbLineString; - } else if (StringUtil::CIEquals(type, "POLYGON")) { - bind_data->geometry_type = wkbPolygon; - } else if (StringUtil::CIEquals(type, "MULTIPOINT")) { - bind_data->geometry_type = wkbMultiPoint; - } else if (StringUtil::CIEquals(type, "MULTILINESTRING")) { - bind_data->geometry_type = wkbMultiLineString; - } else if (StringUtil::CIEquals(type, "MULTIPOLYGON")) { - bind_data->geometry_type = wkbMultiPolygon; - } else if (StringUtil::CIEquals(type, "GEOMETRYCOLLECTION")) { - bind_data->geometry_type = wkbGeometryCollection; - } else { - throw BinderException("Unknown geometry type '%s', expected one of 'POINT', 'LINESTRING', " - "'POLYGON', 'MULTIPOINT', " - "'MULTILINESTRING', 'MULTIPOLYGON', 'GEOMETRYCOLLECTION'", - type); - } - } else { - throw BinderException("Geometry type must be a string"); - } - } else if (StringUtil::Upper(option.first) == "SRS") { - auto &set = option.second.front(); - if (set.type().id() == LogicalTypeId::VARCHAR) { - bind_data->target_srs = set.GetValue(); - } else { - throw BinderException("SRS must be a string"); - } - } else { - throw BinderException("Unknown option '%s'", option.first); - } - // save dataset open options.. i guess? - } +namespace gdal_list { - if (bind_data->driver_name.empty()) { - throw BinderException("Driver name must be specified"); - } +//---------------------------------------------------------------------------------------------------------------------- +// Bind +//---------------------------------------------------------------------------------------------------------------------- +class BindData final : public TableFunctionData { +public: + idx_t driver_count; +}; - if (bind_data->layer_name.empty()) { - // Default to the base name of the file - auto &fs = FileSystem::GetFileSystem(context); - bind_data->layer_name = fs.ExtractBaseName(bind_data->file_path); - } +auto Bind(ClientContext &context, TableFunctionBindInput &input, vector &types, vector &names) + -> unique_ptr { + + types.emplace_back(LogicalType::VARCHAR); + types.emplace_back(LogicalType::VARCHAR); + types.emplace_back(LogicalType::BOOLEAN); + types.emplace_back(LogicalType::BOOLEAN); + types.emplace_back(LogicalType::BOOLEAN); + types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("short_name"); + names.emplace_back("long_name"); + names.emplace_back("can_create"); + names.emplace_back("can_copy"); + names.emplace_back("can_open"); + names.emplace_back("help_url"); + + auto result = make_uniq(); + result->driver_count = GDALGetDriverCount(); + return std::move(result); +} - auto driver = GetGDALDriverManager()->GetDriverByName(bind_data->driver_name.c_str()); - if (!driver) { - throw BinderException("Unknown driver '%s'", bind_data->driver_name); - } +//---------------------------------------------------------------------------------------------------------------------- +// Global State +//---------------------------------------------------------------------------------------------------------------------- +class GlobalState final : public GlobalTableFunctionState { +public: + idx_t current_idx; +}; - // Try get the file extension from the driver - auto file_ext = driver->GetMetadataItem(GDAL_DMD_EXTENSION); - if (file_ext) { - input.file_extension = file_ext; - } else { - // Space separated list of file extensions - auto file_exts = driver->GetMetadataItem(GDAL_DMD_EXTENSIONS); - if (file_exts) { - auto exts = StringUtil::Split(file_exts, ' '); - if (!exts.empty()) { - input.file_extension = exts[0]; - } - } - } +auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique_ptr { + auto result = make_uniq(); + result->current_idx = 0; + return std::move(result); +} - // Driver specific checks - if (bind_data->driver_name == "OpenFileGDB" && bind_data->geometry_type == wkbUnknown) { - throw BinderException("OpenFileGDB requires 'GEOMETRY_TYPE' parameter to be set when writing!"); - } +//---------------------------------------------------------------------------------------------------------------------- +// Scan +//---------------------------------------------------------------------------------------------------------------------- +void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + auto &bdata = input.bind_data->Cast(); + auto &gstate = input.global_state->Cast(); - return std::move(bind_data); - } + idx_t count = 0; - //------------------------------------------------------------------------------------------------------------------ - // Global State - //------------------------------------------------------------------------------------------------------------------ - struct GlobalState final : GlobalFunctionData { - mutex lock; - GDALDatasetUniquePtr dataset; - OGRLayer *layer; - vector> field_defs; - - GlobalState(GDALDatasetUniquePtr dataset, OGRLayer *layer, vector> field_defs) - : dataset(std::move(dataset)), layer(layer), field_defs(std::move(field_defs)) { - } - }; - - static bool IsGeometryType(const LogicalType &type) { - return type == GeoTypes::POINT_2D() || type == LogicalType::GEOMETRY(); - } - - static unique_ptr OGRFieldTypeFromLogicalType(const string &name, const LogicalType &type) { - // TODO: Set OGRFieldSubType for integers and integer lists - // TODO: Set string width? - - switch (type.id()) { - case LogicalTypeId::BOOLEAN: { - auto field = make_uniq(name.c_str(), OFTInteger); - field->SetSubType(OFSTBoolean); - return field; - } - case LogicalTypeId::TINYINT: { - // There is no subtype for byte? - return make_uniq(name.c_str(), OFTInteger); - } - case LogicalTypeId::SMALLINT: { - auto field = make_uniq(name.c_str(), OFTInteger); - field->SetSubType(OFSTInt16); - return field; - } - case LogicalTypeId::INTEGER: { - return make_uniq(name.c_str(), OFTInteger); - } - case LogicalTypeId::BIGINT: - return make_uniq(name.c_str(), OFTInteger64); - case LogicalTypeId::FLOAT: { - auto field = make_uniq(name.c_str(), OFTReal); - field->SetSubType(OFSTFloat32); - return field; - } - case LogicalTypeId::DOUBLE: - return make_uniq(name.c_str(), OFTReal); - case LogicalTypeId::VARCHAR: - return make_uniq(name.c_str(), OFTString); - case LogicalTypeId::BLOB: - return make_uniq(name.c_str(), OFTBinary); - case LogicalTypeId::DATE: - return make_uniq(name.c_str(), OFTDate); - case LogicalTypeId::TIME: - return make_uniq(name.c_str(), OFTTime); - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP_SEC: - case LogicalTypeId::TIMESTAMP_TZ: - return make_uniq(name.c_str(), OFTDateTime); - case LogicalTypeId::LIST: { - auto child_type = ListType::GetChildType(type); - switch (child_type.id()) { - case LogicalTypeId::BOOLEAN: { - auto field = make_uniq(name.c_str(), OFTIntegerList); - field->SetSubType(OFSTBoolean); - return field; - } - case LogicalTypeId::TINYINT: { - // There is no subtype for byte? - return make_uniq(name.c_str(), OFTIntegerList); - } - case LogicalTypeId::SMALLINT: { - auto field = make_uniq(name.c_str(), OFTIntegerList); - field->SetSubType(OFSTInt16); - return field; - } - case LogicalTypeId::INTEGER: - return make_uniq(name.c_str(), OFTIntegerList); - case LogicalTypeId::BIGINT: - return make_uniq(name.c_str(), OFTInteger64List); - case LogicalTypeId::FLOAT: { - auto field = make_uniq(name.c_str(), OFTRealList); - field->SetSubType(OFSTFloat32); - return field; - } - case LogicalTypeId::DOUBLE: - return make_uniq(name.c_str(), OFTRealList); - case LogicalTypeId::VARCHAR: - return make_uniq(name.c_str(), OFTStringList); - default: - throw NotImplementedException("Unsupported type for OGR: %s", type.ToString()); - } - } - default: - throw NotImplementedException("Unsupported type for OGR: %s", type.ToString()); + const auto total_end = bdata.driver_count; + const auto batch_end = gstate.current_idx + STANDARD_VECTOR_SIZE; + const auto chunk_end = MinValue(batch_end, total_end); + + for (const auto next_idx = chunk_end; gstate.current_idx < next_idx; gstate.current_idx++) { + auto driver = GDALGetDriver(static_cast(gstate.current_idx)); + + // Check if the driver is a vector driver + if (GDALGetMetadataItem(driver, GDAL_DCAP_VECTOR, nullptr) == nullptr) { + continue; } + + auto short_name = Value::CreateValue(GDALGetDriverShortName(driver)); + auto long_name = Value::CreateValue(GDALGetDriverLongName(driver)); + + const char *create_flag = GDALGetMetadataItem(driver, GDAL_DCAP_CREATE, nullptr); + auto create_value = Value::CreateValue(create_flag != nullptr); + + const char *copy_flag = GDALGetMetadataItem(driver, GDAL_DCAP_CREATECOPY, nullptr); + auto copy_value = Value::CreateValue(copy_flag != nullptr); + const char *open_flag = GDALGetMetadataItem(driver, GDAL_DCAP_OPEN, nullptr); + auto open_value = Value::CreateValue(open_flag != nullptr); + + auto help_topic_flag = GDALGetDriverHelpTopic(driver); + auto help_topic_value = help_topic_flag == nullptr + ? Value(LogicalType::VARCHAR) + : Value(StringUtil::Format("https://gdal.org/%s", help_topic_flag)); + + output.data[0].SetValue(count, short_name); + output.data[1].SetValue(count, long_name); + output.data[2].SetValue(count, create_value); + output.data[3].SetValue(count, copy_value); + output.data[4].SetValue(count, open_value); + output.data[5].SetValue(count, help_topic_value); + count++; } + output.SetCardinality(count); +} - static unique_ptr InitGlobal(ClientContext &context, FunctionData &bind_data, - const string &file_path) { +//---------------------------------------------------------------------------------------------------------------------- +// Register +//---------------------------------------------------------------------------------------------------------------------- +static constexpr auto DESCRIPTION = R"( + Returns the list of supported GDAL drivers and file formats - auto &gdal_data = bind_data.Cast(); - GDALDriver *driver = GetGDALDriverManager()->GetDriverByName(gdal_data.driver_name.c_str()); - if (!driver) { - throw IOException("Could not open driver"); - } + Note that far from all of these drivers have been tested properly. + Some may require additional options to be passed to work as expected. + If you run into any issues please first consult the [consult the GDAL docs](https://gdal.org/drivers/vector/index.html). +)"; - // Create the dataset - auto &client_ctx = GDALClientContextState::GetOrCreate(context); - auto prefixed_path = client_ctx.GetPrefix(file_path); - auto dataset = GDALDatasetUniquePtr( - driver->Create(prefixed_path.c_str(), 0, 0, 0, GDT_Unknown, gdal_data.dataset_creation_options)); - if (!dataset) { - throw IOException("Could not open dataset"); - } +static constexpr auto EXAMPLE = R"( + SELECT * FROM ST_Drivers(); +)"; - // Set the SRS if provided - OGRSpatialReference srs; - if (!gdal_data.target_srs.empty()) { - srs.SetFromUserInput(gdal_data.target_srs.c_str()); - } - // Not all GDAL drivers check if the SRS is empty (cough cough GeoJSONSeq) - // so we have to pass nullptr if we want the default behavior. - OGRSpatialReference *srs_ptr = gdal_data.target_srs.empty() ? nullptr : &srs; +void Register(ExtensionLoader &loader) { + TableFunction list_func("ST_Drivers", {}, Scan, Bind, InitGlobal); + loader.RegisterFunction(list_func); - auto layer = dataset->CreateLayer(gdal_data.layer_name.c_str(), srs_ptr, gdal_data.geometry_type, - gdal_data.layer_creation_options); - if (!layer) { - throw IOException("Could not create layer"); - } + InsertionOrderPreservingMap tags; + tags.insert("ext", "spatial"); + FunctionBuilder::AddTableFunctionDocs(loader, "ST_Drivers", DESCRIPTION, EXAMPLE, tags); +} - // Create the layer field definitions - idx_t geometry_field_count = 0; - vector> field_defs; - for (idx_t i = 0; i < gdal_data.field_names.size(); i++) { - auto &name = gdal_data.field_names[i]; - auto &type = gdal_data.field_sql_types[i]; +} // namespace gdal_list +//===================================================================================================================== +// GDAL META +//====================================================================================================================== +namespace gdal_meta { - if (IsGeometryType(type)) { - geometry_field_count++; - if (geometry_field_count > 1) { - throw NotImplementedException("Multiple geometry fields not supported yet"); - } - } else { - auto field = OGRFieldTypeFromLogicalType(name, type); - if (layer->CreateField(field.get()) != OGRERR_NONE) { - throw IOException("Could not create attribute field"); - } - // TODO: ^ Like we do here vvv - field_defs.push_back(std::move(field)); - } - } - auto global_data = make_uniq(std::move(dataset), layer, std::move(field_defs)); +//---------------------------------------------------------------------------------------------------------------------- +// Bind +//---------------------------------------------------------------------------------------------------------------------- +class BindData final : public TableFunctionData { +public: + vector files; +}; - return std::move(global_data); - } +LogicalType GetGeometryFieldType() { + return LogicalType::STRUCT({ + {"name", LogicalType::VARCHAR}, + {"type", LogicalType::VARCHAR}, + {"nullable", LogicalType::BOOLEAN}, + {"crs", LogicalType::STRUCT({ + {"name", LogicalType::VARCHAR}, + {"auth_name", LogicalType::VARCHAR}, + {"auth_code", LogicalType::VARCHAR}, + {"wkt", LogicalType::VARCHAR}, + {"proj4", LogicalType::VARCHAR}, + {"projjson", LogicalType::VARCHAR}, + })}, + }); +} - //------------------------------------------------------------------------------------------------------------------ - // Local State - //------------------------------------------------------------------------------------------------------------------ - struct LocalState final : public LocalFunctionData { - ArenaAllocator arena; - explicit LocalState(ClientContext &context) : arena(BufferAllocator::Get(context)) { - } - }; +LogicalType GetStandardFieldType() { + return LogicalType::STRUCT({ + {"name", LogicalType::VARCHAR}, + {"type", LogicalType::VARCHAR}, + {"subtype", LogicalType::VARCHAR}, + {"nullable", LogicalType::BOOLEAN}, + {"unique", LogicalType::BOOLEAN}, + {"width", LogicalType::BIGINT}, + {"precision", LogicalType::BIGINT}, + }); +} - static unique_ptr InitLocal(ExecutionContext &context, FunctionData &bind_data) { - auto local_data = make_uniq(context.client); - return std::move(local_data); - } +LogicalType GetLayerType() { + return LogicalType::STRUCT({ + {"name", LogicalType::VARCHAR}, + {"feature_count", LogicalType::BIGINT}, + {"geometry_fields", LogicalType::LIST(GetGeometryFieldType())}, + {"fields", LogicalType::LIST(GetStandardFieldType())}, + }); +} - //------------------------------------------------------------------------------------------------------------------ - // Sink - //------------------------------------------------------------------------------------------------------------------ - static OGRGeometryUniquePtr OGRGeometryFromValue(const LogicalType &type, const Value &value, - ArenaAllocator &arena) { - if (value.IsNull()) { - return nullptr; - } +auto Bind(ClientContext &context, TableFunctionBindInput &input, vector &types, vector &names) + -> unique_ptr { + names.push_back("file_name"); + names.push_back("driver_short_name"); + names.push_back("driver_long_name"); + names.push_back("layers"); + + types.push_back(LogicalType::VARCHAR); + types.push_back(LogicalType::VARCHAR); + types.push_back(LogicalType::VARCHAR); + types.push_back(LogicalType::LIST(GetLayerType())); + + const auto mf_reader = MultiFileReader::Create(input.table_function); + const auto mf_inputs = mf_reader->CreateFileList(context, input.inputs[0], FileGlobOptions::ALLOW_EMPTY); - if (type == LogicalType::GEOMETRY()) { - const auto blob = value.GetValueUnsafe(); - uint32_t size = blob.GetSize(); - OGRGeometry *ptr; - // TODO: Fix this - const auto ok = OGRGeometryFactory::createFromWkb(blob.GetData(), nullptr, &ptr, size, wkbVariantIso); - if (ok != OGRERR_NONE) { - throw IOException("Could not parse WKB"); + auto result = make_uniq(); + result->files = mf_inputs->GetAllFiles(); + return std::move(result); +} + +//---------------------------------------------------------------------------------------------------------------------- +// Global State +//---------------------------------------------------------------------------------------------------------------------- +class GlobalState final : public GlobalTableFunctionState { +public: + idx_t current_idx = 0; +}; + +auto InitGlobal(ClientContext &context, TableFunctionInitInput &input) -> unique_ptr { + auto result = make_uniq(); + return std::move(result); +} + +//---------------------------------------------------------------------------------------------------------------------- +// Scan +//---------------------------------------------------------------------------------------------------------------------- +static Value GetLayerData(const GDALDatasetUniquePtr &dataset) { + + vector layer_values; + for (const auto &layer : dataset->GetLayers()) { + child_list_t layer_value_fields; + + layer_value_fields.emplace_back("name", Value(layer->GetName())); + layer_value_fields.emplace_back("feature_count", Value(static_cast(layer->GetFeatureCount()))); + + vector geometry_fields; + for (const auto &field : layer->GetLayerDefn()->GetGeomFields()) { + child_list_t geometry_field_value_fields; + auto field_name = field->GetNameRef(); + if (std::strlen(field_name) == 0) { + field_name = "geom"; } - return OGRGeometryUniquePtr(ptr); + geometry_field_value_fields.emplace_back("name", Value(field_name)); + geometry_field_value_fields.emplace_back("type", Value(OGRGeometryTypeToName(field->GetType()))); + geometry_field_value_fields.emplace_back("nullable", Value(static_cast(field->IsNullable()))); + + const auto crs = field->GetSpatialRef(); + if (crs != nullptr) { + child_list_t crs_value_fields; + crs_value_fields.emplace_back("name", Value(crs->GetName())); + crs_value_fields.emplace_back("auth_name", Value(crs->GetAuthorityName(nullptr))); + crs_value_fields.emplace_back("auth_code", Value(crs->GetAuthorityCode(nullptr))); + + char *wkt_ptr = nullptr; + crs->exportToWkt(&wkt_ptr); + crs_value_fields.emplace_back("wkt", wkt_ptr ? Value(wkt_ptr) : Value()); + CPLFree(wkt_ptr); + + char *proj4_ptr = nullptr; + crs->exportToProj4(&proj4_ptr); + crs_value_fields.emplace_back("proj4", proj4_ptr ? Value(proj4_ptr) : Value()); + CPLFree(proj4_ptr); + + char *projjson_ptr = nullptr; + crs->exportToPROJJSON(&projjson_ptr, nullptr); + crs_value_fields.emplace_back("projjson", projjson_ptr ? Value(projjson_ptr) : Value()); + CPLFree(projjson_ptr); + + geometry_field_value_fields.emplace_back("crs", Value::STRUCT(crs_value_fields)); + } else { + Value null_crs; + geometry_field_value_fields.emplace_back("crs", null_crs); + } + + geometry_fields.push_back(Value::STRUCT(geometry_field_value_fields)); } + layer_value_fields.emplace_back("geometry_fields", + Value::LIST(GetGeometryFieldType(), std::move(geometry_fields))); - if (type == GeoTypes::POINT_2D()) { - auto children = StructValue::GetChildren(value); - auto x = children[0].GetValue(); - auto y = children[1].GetValue(); - auto ogr_point = new OGRPoint(x, y); - return OGRGeometryUniquePtr(ogr_point); + vector standard_fields; + for (const auto &field : layer->GetLayerDefn()->GetFields()) { + child_list_t standard_field_value_fields; + standard_field_value_fields.emplace_back("name", Value(field->GetNameRef())); + standard_field_value_fields.emplace_back("type", Value(OGR_GetFieldTypeName(field->GetType()))); + standard_field_value_fields.emplace_back("subtype", Value(OGR_GetFieldSubTypeName(field->GetSubType()))); + standard_field_value_fields.emplace_back("nullable", Value(field->IsNullable())); + standard_field_value_fields.emplace_back("unique", Value(field->IsUnique())); + standard_field_value_fields.emplace_back("width", Value(field->GetWidth())); + standard_field_value_fields.emplace_back("precision", Value(field->GetPrecision())); + standard_fields.push_back(Value::STRUCT(standard_field_value_fields)); } + layer_value_fields.emplace_back("fields", Value::LIST(GetStandardFieldType(), std::move(standard_fields))); - throw NotImplementedException("Unsupported geometry type"); + layer_values.push_back(Value::STRUCT(layer_value_fields)); } - static void SetOgrFieldFromValue(OGRFeature *feature, int field_idx, const LogicalType &type, const Value &value) { - // TODO: Set field by index always instead of by name for performance. - if (value.IsNull()) { - feature->SetFieldNull(field_idx); - return; - } - switch (type.id()) { - case LogicalTypeId::BOOLEAN: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::TINYINT: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::SMALLINT: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::INTEGER: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::BIGINT: - feature->SetField(field_idx, (GIntBig)value.GetValue()); - break; - case LogicalTypeId::FLOAT: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::DOUBLE: - feature->SetField(field_idx, value.GetValue()); - break; - case LogicalTypeId::VARCHAR: - case LogicalTypeId::BLOB: { - auto str = value.GetValueUnsafe(); - feature->SetField(field_idx, (int)str.GetSize(), str.GetDataUnsafe()); - } break; - case LogicalTypeId::DATE: { - auto date = value.GetValueUnsafe(); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - feature->SetField(field_idx, year, month, day, 0, 0, 0, 0); - } break; - case LogicalTypeId::TIME: { - auto time = value.GetValueUnsafe(); - auto hour = static_cast(time.micros / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, 0, 0, 0, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP: { - auto timestamp = value.GetValueUnsafe(); - auto date = Timestamp::GetDate(timestamp); - auto time = Timestamp::GetTime(timestamp); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, year, month, day, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP_NS: { - auto timestamp = value.GetValueUnsafe(); - timestamp = Timestamp::FromEpochNanoSeconds(timestamp.value); - auto date = Timestamp::GetDate(timestamp); - auto time = Timestamp::GetTime(timestamp); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, year, month, day, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP_MS: { - auto timestamp = value.GetValueUnsafe(); - timestamp = Timestamp::FromEpochMs(timestamp.value); - auto date = Timestamp::GetDate(timestamp); - auto time = Timestamp::GetTime(timestamp); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, year, month, day, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP_SEC: { - auto timestamp = value.GetValueUnsafe(); - timestamp = Timestamp::FromEpochSeconds(timestamp.value); - auto date = Timestamp::GetDate(timestamp); - auto time = Timestamp::GetTime(timestamp); - auto year = Date::ExtractYear(date); - auto month = Date::ExtractMonth(date); - auto day = Date::ExtractDay(date); - auto hour = static_cast((time.micros % Interval::MICROS_PER_DAY) / Interval::MICROS_PER_HOUR); - auto minute = static_cast((time.micros % Interval::MICROS_PER_HOUR) / Interval::MICROS_PER_MINUTE); - auto second = static_cast(static_cast(time.micros % Interval::MICROS_PER_MINUTE) / - static_cast(Interval::MICROS_PER_SEC)); - feature->SetField(field_idx, year, month, day, hour, minute, second, 0); - } break; - case LogicalTypeId::TIMESTAMP_TZ: { - // Not sure what to with the timezone, just let GDAL parse it? - auto timestamp = value.GetValueUnsafe(); - auto time_str = Timestamp::ToString(timestamp); - feature->SetField(field_idx, time_str.c_str()); - } break; - default: - // TODO: Handle list types - throw NotImplementedException("Unsupported field type"); - } - } - - static void Sink(ExecutionContext &context, FunctionData &bdata, GlobalFunctionData &gstate, - LocalFunctionData &lstate, DataChunk &input) { - - auto &bind_data = bdata.Cast(); - auto &global_state = gstate.Cast(); - auto &local_state = lstate.Cast(); - local_state.arena.Reset(); - - lock_guard d_lock(global_state.lock); - auto layer = global_state.layer; - - // Create the feature - input.Flatten(); - for (idx_t row_idx = 0; row_idx < input.size(); row_idx++) { - - auto feature = OGRFeatureUniquePtr(OGRFeature::CreateFeature(layer->GetLayerDefn())); - - // Geometry fields do not count towards the field index, so we need to keep track of them separately. - idx_t field_idx = 0; - for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) { - auto &type = bind_data.field_sql_types[col_idx]; - auto value = input.GetValue(col_idx, row_idx); - - if (IsGeometryType(type)) { - // TODO: check how many geometry fields there are and use the correct one. - auto geom = OGRGeometryFromValue(type, value, local_state.arena); - if (geom && bind_data.geometry_type != wkbUnknown && - geom->getGeometryType() != bind_data.geometry_type) { - auto got_name = StringUtil::Replace( - StringUtil::Upper(OGRGeometryTypeToName(geom->getGeometryType())), " ", ""); - auto expected_name = StringUtil::Replace( - StringUtil::Upper(OGRGeometryTypeToName(bind_data.geometry_type)), " ", ""); - throw InvalidInputException( - "Expected all geometries to be of type '%s', but got one of type '%s'", expected_name, - got_name); - } + return Value::LIST(GetLayerType(), std::move(layer_values)); +} - if (feature->SetGeometryDirectly(geom.release()) != OGRERR_NONE) { - throw IOException("Could not set geometry"); - } - } else { - SetOgrFieldFromValue(feature.get(), static_cast(field_idx), type, value); - field_idx++; - } - } - if (layer->CreateFeature(feature.get()) != OGRERR_NONE) { - throw IOException("Could not create feature"); - } +void Scan(ClientContext &context, TableFunctionInput &input, DataChunk &output) { + auto &bdata = input.bind_data->Cast(); + auto &gstate = input.global_state->Cast(); + + const auto &file_prefix = DuckDBFileSystemPrefix::GetOrCreate(context); + + const auto remaining = MinValue(STANDARD_VECTOR_SIZE, bdata.files.size() - gstate.current_idx); + auto output_idx = 0; + + for (idx_t in_idx = 0; in_idx < remaining; in_idx++, gstate.current_idx++) { + auto &file = bdata.files[gstate.current_idx]; + auto prefixed_file_name = file_prefix.AddPrefix(file.path); + + GDALDatasetUniquePtr dataset; + try { + dataset = GDALDatasetUniquePtr( + GDALDataset::Open(prefixed_file_name.c_str(), GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR)); + } catch (...) { + // Just skip anything we cant open + continue; } - } - //------------------------------------------------------------------------------------------------------------------ - // Combine - //------------------------------------------------------------------------------------------------------------------ - static void Combine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate) { - } + output.data[0].SetValue(output_idx, file.path); + output.data[1].SetValue(output_idx, dataset->GetDriver()->GetDescription()); + output.data[2].SetValue(output_idx, dataset->GetDriver()->GetMetadataItem(GDAL_DMD_LONGNAME)); + output.data[3].SetValue(output_idx, GetLayerData(dataset)); - //------------------------------------------------------------------------------------------------------------------ - // Finalize - //------------------------------------------------------------------------------------------------------------------ - static void Finalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { - const auto &global_state = gstate.Cast(); - global_state.dataset->FlushCache(); - global_state.dataset->Close(); + output_idx++; } - //------------------------------------------------------------------------------------------------------------------ - // Register - //------------------------------------------------------------------------------------------------------------------ - static void Register(ExtensionLoader &loader) { - /* - CopyFunction info("GDAL"); - info.copy_to_bind = Bind; - info.copy_to_initialize_local = InitLocal; - info.copy_to_initialize_global = InitGlobal; - info.copy_to_sink = Sink; - info.copy_to_combine = Combine; - info.copy_to_finalize = Finalize; - info.extension = "gdal"; - loader.RegisterFunction(info); - */ - } -}; + output.SetCardinality(output_idx); +} -} // namespace +//---------------------------------------------------------------------------------------------------------------------- +// Register +//---------------------------------------------------------------------------------------------------------------------- +static constexpr auto DESCRIPTION = R"( + Read the metadata from a variety of geospatial file formats using the GDAL library. -//###################################################################################################################### -// Register Module -//###################################################################################################################### -void RegisterGDALModule(ExtensionLoader &loader) { - // - // // Load GDAL (once) - // static std::once_flag loaded; - // std::call_once(loaded, [&]() { - // // Register all embedded drivers (dont go looking for plugins) - // OGRRegisterAllInternal(); - // - // // Set GDAL error handler - // CPLSetErrorHandler([](CPLErr e, int code, const char *raw_msg) { - // // DuckDB doesnt do warnings, so we only throw on errors - // if (e != CE_Failure && e != CE_Fatal) { - // return; - // } - // - // // If the error contains a /vsiduckdb-/ prefix, - // // try to strip it off to make the errors more readable - // auto msg = string(raw_msg); - // auto path_pos = msg.find("/vsiduckdb-"); - // if (path_pos != string::npos) { - // // We found a path, strip it off - // msg.erase(path_pos, 48); - // } - // - // // GDAL Catches exceptions internally and passes them on to the handler again as CPLE_AppDefined - // // So we don't add any extra information here or we end up with very long nested error messages. - // // Using ErrorData we can parse the message part of DuckDB exceptions properly, and for other exceptions - // // their error message will still be preserved as the "raw message". - // ErrorData error_msg(raw_msg); - // - // switch (code) { - // case CPLE_NoWriteAccess: - // throw PermissionException(error_msg.RawMessage()); - // case CPLE_UserInterrupt: - // throw InterruptException(); - // case CPLE_OutOfMemory: - // throw OutOfMemoryException(error_msg.RawMessage()); - // case CPLE_NotSupported: - // throw NotImplementedException(error_msg.RawMessage()); - // case CPLE_AssertionFailed: - // case CPLE_ObjectNull: - // throw InternalException(error_msg.RawMessage()); - // case CPLE_IllegalArg: - // throw InvalidInputException( error_msg.RawMessage()); - // case CPLE_AppDefined: - // case CPLE_HttpResponse: - // case CPLE_FileIO: - // case CPLE_OpenFailed: - // default: - // throw IOException(error_msg.RawMessage()); - // } - // }); - // }); - - RegisterExtraFunction(loader); - - //ST_Read::Register(loader); - //ST_Read_Meta::Register(loader); - //ST_Drivers::Register(loader); - // ST_Write::Register(loader); + The `ST_Read_Meta` table function accompanies the `ST_Read` table function, but instead of reading the contents of a file, this function scans the metadata instead. + Since the data model of the underlying GDAL library is quite flexible, most of the interesting metadata is within the returned `layers` column, which is a somewhat complex nested structure of DuckDB `STRUCT` and `LIST` types. + )"; + +static constexpr auto EXAMPLE = R"( + -- Find the coordinate reference system authority name and code for the first layers first geometry column in the file + SELECT + layers[1].geometry_fields[1].crs.auth_name as name, + layers[1].geometry_fields[1].crs.auth_code as code + FROM st_read_meta('../../tmp/data/amsterdam_roads.fgb'); + )"; + +static void Register(ExtensionLoader &loader) { + const TableFunction func("ST_Read_Meta", {LogicalType::VARCHAR}, Scan, Bind, InitGlobal); + loader.RegisterFunction(MultiFileReader::CreateFunctionSet(func)); + + InsertionOrderPreservingMap tags; + tags.insert("ext", "spatial"); + FunctionBuilder::AddTableFunctionDocs(loader, "ST_Read_Meta", DESCRIPTION, EXAMPLE, tags); } +} // namespace gdal_meta +//====================================================================================================================== +// GDAL MODULE +//====================================================================================================================== +void RegisterGDALModule(ExtensionLoader &loader) { + + // Load GDAL (once) + static std::once_flag loaded; + std::call_once(loaded, [&]() { + // Register all embedded drivers (dont go looking for plugins) + OGRRegisterAllInternal(); + + // Set GDAL error handler + CPLSetErrorHandler([](CPLErr e, int code, const char *raw_msg) { + // DuckDB doesnt do warnings, so we only throw on errors + if (e != CE_Failure && e != CE_Fatal) { + return; + } + + // GDAL Catches exceptions internally and passes them on to the handler again as CPLE_AppDefined + // So we don't add any extra information here or we end up with very long nested error messages. + // Using ErrorData we can parse the message part of DuckDB exceptions properly, and for other exceptions + // their error message will still be preserved as the "raw message". + ErrorData error_data(raw_msg); + auto msg = error_data.RawMessage(); + + // If the error contains a /vsiduckdb-/ prefix, + // try to strip it off to make the errors more readable + auto path_pos = msg.find("/vsiduckdb-"); + if (path_pos != string::npos) { + // We found a path, strip it off + msg.erase(path_pos, 48); + } + + switch (code) { + case CPLE_NoWriteAccess: + throw PermissionException(msg); + case CPLE_UserInterrupt: + throw InterruptException(); + case CPLE_OutOfMemory: + throw OutOfMemoryException(msg); + case CPLE_NotSupported: + throw NotImplementedException(msg); + case CPLE_AssertionFailed: + case CPLE_ObjectNull: + throw InternalException(msg); + case CPLE_IllegalArg: + throw InvalidInputException(msg); + case CPLE_AppDefined: + case CPLE_HttpResponse: + case CPLE_FileIO: + case CPLE_OpenFailed: + default: + throw IOException(msg); + } + }); + }); + + gdal_read::Register(loader); + gdal_copy::Register(loader); + gdal_list::Register(loader); + gdal_meta::Register(loader); +} } // namespace duckdb From c4a1ea6c7ba446e683f6ce84974999e812c6968f Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Tue, 18 Nov 2025 16:10:34 +0100 Subject: [PATCH 28/41] cleanup --- dist_join.sql | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 dist_join.sql diff --git a/dist_join.sql b/dist_join.sql deleted file mode 100644 index c551d23e..00000000 --- a/dist_join.sql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE TABLE points AS - SELECT - row_number() OVER () AS id, - ST_POINT(x, y) as geom - FROM - range(0, 200, 2) AS r(x), - range(0, 200, 2) AS r(y); - -.timer on -SELECT * FROM points as l JOIN points as r ON ST_DWithin(l.geom, r.geom, 4); From 5ef8499f8140dcd56bb4f2be1f0c63b6e3857abc Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 19 Nov 2025 11:23:06 +0100 Subject: [PATCH 29/41] write ogc_fid if present --- src/spatial/modules/gdal/gdal_module.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/spatial/modules/gdal/gdal_module.cpp b/src/spatial/modules/gdal/gdal_module.cpp index c58119e7..e2b5a9ee 100644 --- a/src/spatial/modules/gdal/gdal_module.cpp +++ b/src/spatial/modules/gdal/gdal_module.cpp @@ -1306,11 +1306,18 @@ auto InitGlobal(ClientContext &context, FunctionData &bdata_p, const string &rea if (geometry_field_count > 1) { throw NotImplementedException("Multiple geometry fields not supported yet"); } - } else { - // Register normal attribute - if (!OGR_L_CreateFieldFromArrowSchema(result->layer, child_schema, nullptr)) { - throw IOException("Could not create field in GDAL layer for column: " + string(child_schema->name)); - } + continue; + } + + // Check if this is the FID field + if (strcmp(child_schema->name, OGRLayer::DEFAULT_ARROW_FID_NAME) == 0) { + // Skip FID field + continue; + } + + // Register normal attribute + if (!OGR_L_CreateFieldFromArrowSchema(result->layer, child_schema, nullptr)) { + throw IOException("Could not create field in GDAL layer for column: " + string(child_schema->name)); } } From 3fb89eb76f438a430d2275419e8dea10cd234e09 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 19 Nov 2025 11:23:24 +0100 Subject: [PATCH 30/41] update mvt to use wkb format --- src/spatial/modules/mvt/mvt_module.cpp | 119 +++++++++++-------------- 1 file changed, 52 insertions(+), 67 deletions(-) diff --git a/src/spatial/modules/mvt/mvt_module.cpp b/src/spatial/modules/mvt/mvt_module.cpp index b4b25d19..e5f4cac1 100644 --- a/src/spatial/modules/mvt/mvt_module.cpp +++ b/src/spatial/modules/mvt/mvt_module.cpp @@ -417,34 +417,17 @@ class MVTFeatureBuilder { void SetGeometry(const string_t &geom_blob) { - BinaryReader cursor(geom_blob.GetData(), geom_blob.GetSize()); - const auto type = static_cast(cursor.Read() + 1); - const auto flags = cursor.Read(); - cursor.Skip(sizeof(uint16_t)); - cursor.Skip(sizeof(uint32_t)); // padding - - // Parse flags - const auto has_z = (flags & 0x01) != 0; - const auto has_m = (flags & 0x02) != 0; - const auto has_bbox = (flags & 0x04) != 0; - - const auto format_v1 = (flags & 0x40) != 0; - const auto format_v0 = (flags & 0x80) != 0; - - if (format_v1 || format_v0) { - // Unsupported version, throw an error - throw NotImplementedException( - "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " - "compatible with this version. Please upgrade your DuckDB installation."); - } + BinaryReader reader(geom_blob.GetData(), geom_blob.GetSize()); - if (has_bbox) { - // Skip past bbox if present - cursor.Skip(sizeof(float) * 2 * (2 + has_z + has_m)); + const auto le = reader.Read(); + if (le != 1) { + throw InvalidInputException("ST_AsMVT: Unsupported geometry endian-ness"); } - - // Read the first type - cursor.Skip(sizeof(uint32_t)); + const auto meta = reader.Read(); + const auto type = static_cast((meta & 0x0000FFFF) % 1000); + const auto flag = (meta & 0x0000FFFF) / 1000; + const auto has_z = (flag & 0x01) != 0; + const auto has_m = (flag & 0x02) != 0; const auto vertex_width = (2 + (has_z ? 1 : 0) + (has_m ? 1 : 0)) * sizeof(double); const auto vertex_space = vertex_width - (2 * sizeof(double)); // Space for x and y @@ -454,14 +437,17 @@ class MVTFeatureBuilder { geometry_type = 1; // MVT_POINT // Read the point geometry - const auto vertex_count = cursor.Read(); - if (vertex_count == 0) { - // No vertices, skip + const auto x_double = reader.Read(); + const auto y_double = reader.Read(); + reader.Skip(vertex_space); // Skip z and m if present + + if (std::isnan(x_double) && std::isnan(y_double)) { + // Empty point throw InvalidInputException("ST_AsMVT: POINT geometry cant be empty"); } - const auto x = CastDouble(cursor.Read()); - const auto y = CastDouble(cursor.Read()); - cursor.Skip(vertex_space); // Skip z and m if present + + const auto x = CastDouble(x_double); + const auto y = CastDouble(y_double); geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part geometry.push_back(protozero::encode_zigzag32(x)); @@ -471,7 +457,7 @@ class MVTFeatureBuilder { case sgl::geometry_type::LINESTRING: { geometry_type = 2; // MVT_LINESTRING - const auto vertex_count = cursor.Read(); + const auto vertex_count = reader.Read(); if (vertex_count < 2) { // Invalid linestring, skip throw InvalidInputException("ST_AsMVT: LINESTRING geometry cant contain less than 2 vertices"); @@ -482,9 +468,9 @@ class MVTFeatureBuilder { for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { - const auto x = CastDouble(cursor.Read()); - const auto y = CastDouble(cursor.Read()); - cursor.Skip(vertex_space); // Skip z and m if present + const auto x = CastDouble(reader.Read()); + const auto y = CastDouble(reader.Read()); + reader.Skip(vertex_space); // Skip z and m if present if (vertex_idx == 0) { geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part @@ -503,7 +489,7 @@ class MVTFeatureBuilder { case sgl::geometry_type::POLYGON: { geometry_type = 3; // MVT_POLYGON - const auto part_count = cursor.Read(); + const auto part_count = reader.Read(); if (part_count == 0) { // No parts, invalid throw InvalidInputException("ST_AsMVT: POLYGON geometry cant be empty"); @@ -512,19 +498,17 @@ class MVTFeatureBuilder { int32_t cursor_x = 0; int32_t cursor_y = 0; - auto ring_cursor = cursor; - cursor.Skip((part_count * 4) + (part_count % 2 == 1 ? 4 : 0)); // Skip part types and padding for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { - const auto vertex_count = ring_cursor.Read(); + const auto vertex_count = reader.Read(); if (vertex_count < 3) { // Invalid polygon, skip throw InvalidInputException("ST_AsMVT: POLYGON ring cant contain less than 3 vertices"); } for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { - const auto x = CastDouble(cursor.Read()); - const auto y = CastDouble(cursor.Read()); - cursor.Skip(vertex_space); // Skip z and m if present + const auto x = CastDouble(reader.Read()); + const auto y = CastDouble(reader.Read()); + reader.Skip(vertex_space); // Skip z and m if present if (vertex_idx == 0) { geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part @@ -552,7 +536,7 @@ class MVTFeatureBuilder { case sgl::geometry_type::MULTI_POINT: { geometry_type = 1; // MVT_POINT - const auto part_count = cursor.Read(); + const auto part_count = reader.Read(); if (part_count == 0) { throw InvalidInputException("ST_AsMVT: MULTI_POINT geometry cant be empty"); } @@ -564,16 +548,20 @@ class MVTFeatureBuilder { // Read the parts for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { - cursor.Skip(sizeof(uint32_t)); // Skip part type - const auto vertex_count = cursor.Read(); - if (vertex_count == 0) { - // No vertices, skip + reader.Skip(sizeof(uint32_t) + sizeof(uint8_t)); // Skip part type + + // Read the point geometry + const auto x_double = reader.Read(); + const auto y_double = reader.Read(); + reader.Skip(vertex_space); // Skip z and m if present + + if (std::isnan(x_double) && std::isnan(y_double)) { + // Empty point throw InvalidInputException("ST_AsMVT: POINT geometry cant be empty"); } - const auto x = CastDouble(cursor.Read()); - const auto y = CastDouble(cursor.Read()); - cursor.Skip(vertex_space); // Skip z and m if present + const auto x = CastDouble(x_double); + const auto y = CastDouble(y_double); geometry.push_back(protozero::encode_zigzag32(x - cursor_x)); geometry.push_back(protozero::encode_zigzag32(y - cursor_y)); @@ -586,7 +574,7 @@ class MVTFeatureBuilder { geometry_type = 2; // MVT_LINESTRING // Read the multi-linestring geometry - const auto part_count = cursor.Read(); + const auto part_count = reader.Read(); if (part_count == 0) { // No parts, invalid throw InvalidInputException("ST_AsMVT: MULTI_LINESTRING geometry cant be empty"); @@ -595,8 +583,8 @@ class MVTFeatureBuilder { int32_t cursor_y = 0; for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { - cursor.Skip(sizeof(uint32_t)); // Skip part type - const auto vertex_count = cursor.Read(); + reader.Skip(sizeof(uint32_t) + sizeof(uint8_t)); // Skip part type + const auto vertex_count = reader.Read(); if (vertex_count < 2) { // Invalid linestring, skip @@ -605,9 +593,9 @@ class MVTFeatureBuilder { for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { - const auto x = CastDouble(cursor.Read()); - const auto y = CastDouble(cursor.Read()); - cursor.Skip(vertex_space); // Skip z and m if present + const auto x = CastDouble(reader.Read()); + const auto y = CastDouble(reader.Read()); + reader.Skip(vertex_space); // Skip z and m if present if (vertex_idx == 0) { geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part @@ -628,7 +616,7 @@ class MVTFeatureBuilder { geometry_type = 3; // MVT_POLYGON // Read the multi-linestring geometry - const auto poly_count = cursor.Read(); + const auto poly_count = reader.Read(); if (poly_count == 0) { // No parts, invalid throw InvalidInputException("ST_AsMVT: MULTI_POLYGON geometry cant be empty"); @@ -638,27 +626,24 @@ class MVTFeatureBuilder { int32_t cursor_y = 0; for (uint32_t poly_idx = 0; poly_idx < poly_count; poly_idx++) { - cursor.Skip(sizeof(uint32_t)); // Skip part type - const auto part_count = cursor.Read(); + reader.Skip(sizeof(uint32_t) + sizeof(uint8_t)); // Skip part type + const auto part_count = reader.Read(); if (part_count == 0) { // No parts, invalid throw InvalidInputException("ST_AsMVT: POLYGON geometry cant be empty"); } - auto ring_cursor = cursor; - cursor.Skip((part_count * 4) + (part_count % 2 == 1 ? 4 : 0)); // Skip part types and padding - for (uint32_t part_idx = 0; part_idx < part_count; part_idx++) { - const auto vertex_count = ring_cursor.Read(); + const auto vertex_count = reader.Read(); if (vertex_count < 3) { // Invalid polygon, skip throw InvalidInputException("ST_AsMVT: POLYGON ring cant contain less than 3 vertices"); } for (uint32_t vertex_idx = 0; vertex_idx < vertex_count; vertex_idx++) { - const auto x = CastDouble(cursor.Read()); - const auto y = CastDouble(cursor.Read()); - cursor.Skip(vertex_space); // Skip z and m if present + const auto x = CastDouble(reader.Read()); + const auto y = CastDouble(reader.Read()); + reader.Skip(vertex_space); // Skip z and m if present if (vertex_idx == 0) { geometry.push_back((1 & 0x7) | (1 << 3)); // MoveTo, 1 part From 2db44b03f71f85d422209ca69288ebaf31bb6f7b Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 19 Nov 2025 13:21:27 +0100 Subject: [PATCH 31/41] sketch out wkb module --- src/spatial/modules/CMakeLists.txt | 1 + src/spatial/modules/wkb/CMakeLists.txt | 3 ++ src/spatial/modules/wkb/README.md | 5 ++ src/spatial/modules/wkb/wkb_module.cpp | 69 ++++++++++++++++++++++++++ src/spatial/modules/wkb/wkb_module.hpp | 9 ++++ src/spatial/spatial_extension.cpp | 2 + 6 files changed, 89 insertions(+) create mode 100644 src/spatial/modules/wkb/CMakeLists.txt create mode 100644 src/spatial/modules/wkb/README.md create mode 100644 src/spatial/modules/wkb/wkb_module.cpp create mode 100644 src/spatial/modules/wkb/wkb_module.hpp diff --git a/src/spatial/modules/CMakeLists.txt b/src/spatial/modules/CMakeLists.txt index c0545e03..88337b9c 100644 --- a/src/spatial/modules/CMakeLists.txt +++ b/src/spatial/modules/CMakeLists.txt @@ -7,6 +7,7 @@ endif() add_subdirectory(osm) add_subdirectory(shapefile) add_subdirectory(mvt) +add_subdirectory(wkb) set(EXTENSION_SOURCES ${EXTENSION_SOURCES} diff --git a/src/spatial/modules/wkb/CMakeLists.txt b/src/spatial/modules/wkb/CMakeLists.txt new file mode 100644 index 00000000..1dd6124a --- /dev/null +++ b/src/spatial/modules/wkb/CMakeLists.txt @@ -0,0 +1,3 @@ +set(EXTENSION_SOURCES + ${EXTENSION_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/wkb_module.cpp + PARENT_SCOPE) diff --git a/src/spatial/modules/wkb/README.md b/src/spatial/modules/wkb/README.md new file mode 100644 index 00000000..6026be9a --- /dev/null +++ b/src/spatial/modules/wkb/README.md @@ -0,0 +1,5 @@ +# WKB Module + +This module provides the `WKB_BLOB` type with associated casts and function overloads. +`WKB_BLOB` was used in previous versions of DuckDB Spatial to represent a geometry that is known to be in WKB format. +But this is now deprecated, and this module is only provided for backward compatibility. \ No newline at end of file diff --git a/src/spatial/modules/wkb/wkb_module.cpp b/src/spatial/modules/wkb/wkb_module.cpp new file mode 100644 index 00000000..d3911d0f --- /dev/null +++ b/src/spatial/modules/wkb/wkb_module.cpp @@ -0,0 +1,69 @@ +#include "wkb_module.hpp" + +#include "duckdb/common/types.hpp" +#include "duckdb/main/extension/extension_loader.hpp" +#include "duckdb/common/types/geometry.hpp" + +//###################################################################################################################### +// Types +//###################################################################################################################### +namespace duckdb { +namespace { + +struct WKBTypes { + + static LogicalType WKB_BLOB() { + auto blob_type = LogicalType(LogicalTypeId::BLOB); + blob_type.SetAlias("WKB_BLOB"); + return blob_type; + } + + static bool ToWKBCast(Vector &source, Vector &result, idx_t count, CastParameters &) { + Geometry::ToBinary(source, result, count); + return true; + } + + static bool FromWKBCast(Vector &source, Vector &result, idx_t count, CastParameters ¶ms) { + Geometry::FromBinary(source, result, count, params.strict); + // TODO: Return false if any errors occurred during the cast + return true; + } + + static void Register(ExtensionLoader &loader) { + + // Register the WKB_BLOB type + loader.RegisterType("WKB_BLOB", WKB_BLOB()); + + // Also register casts + // WKB_BLOB -> GEOMETRY (Explicit) + loader.RegisterCastFunction(WKB_BLOB(), LogicalType::GEOMETRY(), FromWKBCast); + + // GEOMETRY -> WKB_BLOB (Explicit) + loader.RegisterCastFunction(LogicalType::GEOMETRY(), WKB_BLOB(), ToWKBCast); + + // WKB_BLOB -> BLOB (Implicit) + loader.RegisterCastFunction(WKB_BLOB(), LogicalType::BLOB, DefaultCasts::ReinterpretCast, 1); + + // BLOB -> WKB_BLOB (Explicit) + loader.RegisterCastFunction(LogicalType::BLOB, WKB_BLOB(), DefaultCasts::ReinterpretCast); + } +}; + +} // namespace +} // namespace duckdb +//###################################################################################################################### +// Functions +//###################################################################################################################### +namespace duckdb { +namespace {} // namespace +} // namespace duckdb +//###################################################################################################################### +// Module Registration +//###################################################################################################################### +namespace duckdb { + +void RegisterWKBModule(ExtensionLoader &loader) { + WKBTypes::Register(loader); +} + +} // namespace duckdb diff --git a/src/spatial/modules/wkb/wkb_module.hpp b/src/spatial/modules/wkb/wkb_module.hpp new file mode 100644 index 00000000..0ff5bf3c --- /dev/null +++ b/src/spatial/modules/wkb/wkb_module.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace duckdb { + +class ExtensionLoader; + +void RegisterWKBModule(ExtensionLoader &loader); + +} // namespace duckdb diff --git a/src/spatial/spatial_extension.cpp b/src/spatial/spatial_extension.cpp index d398e615..2ce04ec0 100644 --- a/src/spatial/spatial_extension.cpp +++ b/src/spatial/spatial_extension.cpp @@ -14,6 +14,7 @@ #include "spatial/modules/osm/osm_module.hpp" #include "spatial/modules/proj/proj_module.hpp" #include "spatial/modules/shapefile/shapefile_module.hpp" +#include "spatial/modules/wkb/wkb_module.hpp" #include "spatial/operators/spatial_operator_extension.hpp" #include "spatial/operators/spatial_join_optimizer.hpp" #include "spatial/spatial_types.hpp" @@ -39,6 +40,7 @@ static void LoadInternal(ExtensionLoader &loader) { RegisterOSMModule(loader); RegisterShapefileModule(loader); RegisterMapboxVectorTileModule(loader); + RegisterWKBModule(loader); RTreeModule::RegisterIndex(loader); RTreeModule::RegisterIndexPragmas(loader); From 87bfbb9e6ab2bdcd5862085da6d4dd1bdd6b360f Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 19 Nov 2025 14:37:07 +0100 Subject: [PATCH 32/41] add implicit cast from old-style geometry --- .../modules/main/spatial_functions_cast.cpp | 187 +++++++++++++++++- src/spatial/modules/wkb/wkb_module.cpp | 26 +-- src/spatial/util/binary_reader.hpp | 4 + test/sql/geometry/geometry_types.test | 2 +- test/sql/geometry/st_has.test | 5 +- 5 files changed, 200 insertions(+), 24 deletions(-) diff --git a/src/spatial/modules/main/spatial_functions_cast.cpp b/src/spatial/modules/main/spatial_functions_cast.cpp index 5ee0096e..ae553cf4 100644 --- a/src/spatial/modules/main/spatial_functions_cast.cpp +++ b/src/spatial/modules/main/spatial_functions_cast.cpp @@ -8,6 +8,8 @@ #include "duckdb/common/operator/cast_operators.hpp" #include "duckdb/common/vector_operations/generic_executor.hpp" #include "duckdb/main/extension/extension_loader.hpp" +#include "spatial/util/binary_reader.hpp" +#include "spatial/util/binary_writer.hpp" namespace duckdb { @@ -113,17 +115,196 @@ struct GeometryCasts { return success; } + //------------------------------------------------------------------------------------------------------------------ + // LEGACY_GEOMETRY -> GEOMETRY + //------------------------------------------------------------------------------------------------------------------ + static uint32_t FromLegacyGeometryRequiredSize(BinaryReader &reader) { + + reader.Skip(sizeof(uint8_t)); // type + const auto flags = reader.Read(); + reader.Skip(sizeof(uint16_t)); + reader.Skip(sizeof(uint32_t)); // padding + + // Parse flags + const auto has_z = (flags & 0x01) != 0; + const auto has_m = (flags & 0x02) != 0; + const auto has_bbox = (flags & 0x04) != 0; + + const auto format_v1 = (flags & 0x40) != 0; + const auto format_v0 = (flags & 0x80) != 0; + + if (format_v1 || format_v0) { + // Unsupported version, throw an error + throw NotImplementedException( + "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " + "compatible with this version. Please upgrade your DuckDB installation."); + } + + if (has_bbox) { + // Skip past bbox if present + reader.Skip(sizeof(float) * 2 * (2 + has_z + has_m)); + } + + // Create root geometry + const auto vert_width = (2 + has_z + has_m) * sizeof(double); + + uint32_t total_size = 0; + while (!reader.IsAtEnd()) { + + const auto type = static_cast(reader.Read() + 1); + const auto size = reader.Read(); + + // Endianness + type + total_size += sizeof(uint8_t) + sizeof(uint32_t); + + switch (type) { + case sgl::geometry_type::POINT: { + // Points have a fixed size + reader.Skip(size * vert_width); + total_size += vert_width; + } break; + case sgl::geometry_type::LINESTRING: { + reader.Skip(size * vert_width); + total_size += sizeof(uint32_t) + (size * vert_width); + } break; + case sgl::geometry_type::POLYGON: { + total_size += sizeof(uint32_t); // ring count + auto ring_reader = reader; + reader.Skip(size * sizeof(uint32_t) + (size % 2) * sizeof(uint32_t)); + for (uint32_t ring_idx = 0; ring_idx < size; ring_idx++) { + const auto ring_size = ring_reader.Read(); + reader.Skip(vert_width * ring_size); + total_size += sizeof(uint32_t) + ring_size * vert_width; + } + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: { + case sgl::geometry_type::GEOMETRY_COLLECTION: { + total_size += sizeof(uint32_t); // item count + } break; + default: + throw InvalidInputException("Unsupported geometry type in legacy geometry!"); + } + } + } + return total_size; + } + + static void FromLegacyGeometryConversion(BinaryReader &reader, BinaryWriter &writer) { + reader.Skip(sizeof(uint8_t)); // type + const auto flags = reader.Read(); + reader.Skip(sizeof(uint16_t)); + reader.Skip(sizeof(uint32_t)); // padding + + // Parse flags + const auto has_z = (flags & 0x01) != 0; + const auto has_m = (flags & 0x02) != 0; + const auto has_bbox = (flags & 0x04) != 0; + + const auto format_v1 = (flags & 0x40) != 0; + const auto format_v0 = (flags & 0x80) != 0; + + if (format_v1 || format_v0) { + // Unsupported version, throw an error + throw NotImplementedException( + "This geometry seems to be written with a newer version of the DuckDB spatial library that is not " + "compatible with this version. Please upgrade your DuckDB installation."); + } + + if (has_bbox) { + // Skip past bbox if present + reader.Skip(sizeof(float) * 2 * (2 + has_z + has_m)); + } + + // Create root geometry + const auto vert_width = (2 + has_z + has_m) * sizeof(double); + + while (!reader.IsAtEnd()) { + const auto type = static_cast(reader.Read() + 1); + const auto size = reader.Read(); + + // Write endianness + type + const auto meta = static_cast(type) + (has_z ? 1 : 0) * 1000 + (has_m ? 2 : 0) * 1000; + + writer.Write(1); // little endian + writer.Write(meta); + + switch (type) { + case sgl::geometry_type::POINT: { + if (size == 0) { + constexpr auto nan = std::numeric_limits::quiet_NaN(); + constexpr double empty[4] = {nan, nan, nan, nan}; + writer.Copy(reinterpret_cast(empty), vert_width); + } else { + const auto vert_data = reader.Reserve(vert_width); + writer.Copy(vert_data, vert_width); + } + } break; + case sgl::geometry_type::LINESTRING: { + writer.Write(size); + + const auto vert_size = vert_width * size; + const auto vert_data = reader.Reserve(vert_size); + + writer.Copy(vert_data, vert_size); + } break; + case sgl::geometry_type::POLYGON: { + writer.Write(size); // ring count + auto ring_reader = reader; + reader.Skip(size * sizeof(uint32_t) + (size % 2) * sizeof(uint32_t)); + for (uint32_t ring_idx = 0; ring_idx < size; ring_idx++) { + const auto ring_size = ring_reader.Read(); + writer.Write(ring_size); + + const auto vert_size = vert_width * ring_size; + const auto vert_data = reader.Reserve(vert_size); + + writer.Copy(vert_data, vert_size); + } + } break; + case sgl::geometry_type::MULTI_POINT: + case sgl::geometry_type::MULTI_LINESTRING: + case sgl::geometry_type::MULTI_POLYGON: + case sgl::geometry_type::GEOMETRY_COLLECTION: { + writer.Write(size); // item count + } break; + default: + throw InvalidInputException("Unsupported geometry type in legacy geometry!"); + } + } + } + + static bool FromLegacyGeometryCast(Vector &source, Vector &result, idx_t count, CastParameters ¶ms) { + UnaryExecutor::Execute(source, result, count, [&](const string_t &old_blob) { + BinaryReader reader(old_blob.GetDataUnsafe(), old_blob.GetSize()); + + const auto new_size = FromLegacyGeometryRequiredSize(reader); + auto new_blob = StringVector::EmptyString(result, new_size); + + reader.Reset(); + BinaryWriter writer(new_blob.GetDataWriteable(), new_blob.GetSize()); + FromLegacyGeometryConversion(reader, writer); + + new_blob.Finalize(); + return new_blob; + }); + return true; + } + //------------------------------------------------------------------------------------------------------------------ // Register //------------------------------------------------------------------------------------------------------------------ static void Register(ExtensionLoader &loader) { const auto geom_type = LogicalType::GEOMETRY(); - // Geometry -> WKB is explicitly castable - // loader.RegisterCastFunction(geom_type, wkb_type, BoundCastInfo(ToWKBCast)); - // Geometry -> BLOB is explicitly castable loader.RegisterCastFunction(geom_type, LogicalType::BLOB, DefaultCasts::ReinterpretCast); + + // Also allow casting from LEGACY_GEOMETRY to GEOMETRY (Implicit) + loader.RegisterCastFunction(GeoTypes::LEGACY_GEOMETRY(), geom_type, FromLegacyGeometryCast, 1); + + // TODO: And the other way around? } }; diff --git a/src/spatial/modules/wkb/wkb_module.cpp b/src/spatial/modules/wkb/wkb_module.cpp index d3911d0f..62d49095 100644 --- a/src/spatial/modules/wkb/wkb_module.cpp +++ b/src/spatial/modules/wkb/wkb_module.cpp @@ -4,9 +4,6 @@ #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/common/types/geometry.hpp" -//###################################################################################################################### -// Types -//###################################################################################################################### namespace duckdb { namespace { @@ -35,32 +32,23 @@ struct WKBTypes { loader.RegisterType("WKB_BLOB", WKB_BLOB()); // Also register casts - // WKB_BLOB -> GEOMETRY (Explicit) - loader.RegisterCastFunction(WKB_BLOB(), LogicalType::GEOMETRY(), FromWKBCast); - - // GEOMETRY -> WKB_BLOB (Explicit) - loader.RegisterCastFunction(LogicalType::GEOMETRY(), WKB_BLOB(), ToWKBCast); + // WKB_BLOB -> GEOMETRY (Implicit) + loader.RegisterCastFunction(WKB_BLOB(), LogicalType::GEOMETRY(), FromWKBCast, 1); // WKB_BLOB -> BLOB (Implicit) loader.RegisterCastFunction(WKB_BLOB(), LogicalType::BLOB, DefaultCasts::ReinterpretCast, 1); + // TODO: Remove support for this in the future + // GEOMETRY -> WKB_BLOB (Explicit) + loader.RegisterCastFunction(LogicalType::GEOMETRY(), WKB_BLOB(), ToWKBCast); + + // TODO: Remove support for this in the future // BLOB -> WKB_BLOB (Explicit) loader.RegisterCastFunction(LogicalType::BLOB, WKB_BLOB(), DefaultCasts::ReinterpretCast); } }; } // namespace -} // namespace duckdb -//###################################################################################################################### -// Functions -//###################################################################################################################### -namespace duckdb { -namespace {} // namespace -} // namespace duckdb -//###################################################################################################################### -// Module Registration -//###################################################################################################################### -namespace duckdb { void RegisterWKBModule(ExtensionLoader &loader) { WKBTypes::Register(loader); diff --git a/src/spatial/util/binary_reader.hpp b/src/spatial/util/binary_reader.hpp index 3cbcea16..bc6bb24e 100644 --- a/src/spatial/util/binary_reader.hpp +++ b/src/spatial/util/binary_reader.hpp @@ -20,6 +20,10 @@ class BinaryReader { return ptr >= end; } + void Reset() { + ptr = beg; + } + template T Read() { static_assert(std::is_trivially_copyable::value, "Type must be trivially copyable"); diff --git a/test/sql/geometry/geometry_types.test b/test/sql/geometry/geometry_types.test index e53bbb68..10e1ffa6 100644 --- a/test/sql/geometry/geometry_types.test +++ b/test/sql/geometry/geometry_types.test @@ -64,7 +64,7 @@ GEOMETRYCOLLECTION query I -SELECT ST_GeometryType(ST_AsWKB(geom)) FROM types +SELECT ST_GeometryType(ST_AsWKB(geom)::WKB_BLOB) FROM types ---- POINT POINT diff --git a/test/sql/geometry/st_has.test b/test/sql/geometry/st_has.test index 82ee8fac..0a92ccc1 100644 --- a/test/sql/geometry/st_has.test +++ b/test/sql/geometry/st_has.test @@ -1,7 +1,10 @@ +# name: test/sql/geometry/st_has.test +# group: [geometry] + require spatial # Test for both GEOMETRY and WKB_BLOB -foreach FUNCTION ST_GeomFromText() ST_GeomFromText().ST_AsWKB() +foreach FUNCTION ST_GeomFromText() ST_GeomFromText().ST_AsWKB()::WKB_BLOB # HasZ for a 2D geometry query I From 7ed242d91d70b59d60c9ab4efd4d938a6108ddd9 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Wed, 19 Nov 2025 15:02:40 +0100 Subject: [PATCH 33/41] adjust test --- test/sql/geoarrow.test | 30 ------------------------------ test/sql/geometry/st_centroid.test | 9 +++++++-- 2 files changed, 7 insertions(+), 32 deletions(-) delete mode 100644 test/sql/geoarrow.test diff --git a/test/sql/geoarrow.test b/test/sql/geoarrow.test deleted file mode 100644 index d649d2e9..00000000 --- a/test/sql/geoarrow.test +++ /dev/null @@ -1,30 +0,0 @@ - -require spatial - -# There may be a better way to test this eventually, but today it's hard to -# write these tests without a way to read and write Arrow streams or files. -# This uses https://github.com/paleolimbot/duckdb-nanoarrow, which may be -# superceeded by something else (or improved and released, depending!) -require nanoarrow - -query I -CALL register_geoarrow_extensions(); ----- -true - -# Check that exporting GEOMETRY results in a geoarrow.wkb extension type -statement ok -COPY (SELECT st_geomfromtext('LINESTRING (0 1, 2 3)')) TO '__TEST_DIR__/test.arrows' WITH (FORMAT ARROWS); - -query I -SELECT * FROM "__TEST_DIR__/test.arrows"; ----- -LINESTRING (0 1, 2 3) - -# Check that importing Arrow data with a geoarrow.wkb type results in GEOMETRY -query I -SELECT * FROM "__WORKING_DIRECTORY__/test/data/geoarrow-wkb.arrow"; ----- -1 POINT (30 10) -2 POINT EMPTY -3 POINT EMPTY diff --git a/test/sql/geometry/st_centroid.test b/test/sql/geometry/st_centroid.test index 2fcfb0e4..b173512d 100644 --- a/test/sql/geometry/st_centroid.test +++ b/test/sql/geometry/st_centroid.test @@ -73,6 +73,11 @@ SELECT ST_Centroid(ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0), (0.1 0.1, POINT (0.5 0.5) query I -SELECT ST_Centroid(ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0), (0.5 0.1, 0.5 0.9, 0.9 0.9, 0.9 0.1, 0.5 0.1))')); +SELECT ST_Y(ST_Centroid(ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0), (0.5 0.1, 0.5 0.9, 0.9 0.9, 0.9 0.1, 0.5 0.1))'))); ---- -POINT (0.405882352941176 0.5) \ No newline at end of file +0.5 + +query I +SELECT ST_X(ST_Centroid(ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0), (0.5 0.1, 0.5 0.9, 0.9 0.9, 0.9 0.1, 0.5 0.1))'))); +---- +0.405882352941 From 4cc03bcf392f67ad9509cf448db6c2501bd44af5 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 20 Nov 2025 11:09:16 +0100 Subject: [PATCH 34/41] update to main duckdb --- duckdb | 2 +- src/spatial/modules/wkb/wkb_module.cpp | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/duckdb b/duckdb index 20696d80..b59251d1 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 20696d805ee421264950657119078ca621c8839b +Subproject commit b59251d1a3257041231328ab53d3730a9dd0a193 diff --git a/src/spatial/modules/wkb/wkb_module.cpp b/src/spatial/modules/wkb/wkb_module.cpp index 62d49095..a1c279dd 100644 --- a/src/spatial/modules/wkb/wkb_module.cpp +++ b/src/spatial/modules/wkb/wkb_module.cpp @@ -1,6 +1,7 @@ #include "wkb_module.hpp" #include "duckdb/common/types.hpp" +#include "duckdb/common/operator/cast_operators.hpp" #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/common/types/geometry.hpp" @@ -21,9 +22,12 @@ struct WKBTypes { } static bool FromWKBCast(Vector &source, Vector &result, idx_t count, CastParameters ¶ms) { - Geometry::FromBinary(source, result, count, params.strict); - // TODO: Return false if any errors occurred during the cast - return true; + try { + return Geometry::FromBinary(source, result, count, params.strict); + } catch (...) { + HandleCastError::AssignError("Failed to cast WKB_BLOB to GEOMETRY", params); + return false; + } } static void Register(ExtensionLoader &loader) { From ffa88a3ea1cfe2814f267eed770ebaeef6d619d2 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 20 Nov 2025 11:22:48 +0100 Subject: [PATCH 35/41] forgot CI --- .github/workflows/MainDistributionPipeline.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 91f0f865..8e1b18c1 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -5,13 +5,13 @@ name: Main Extension Distribution Pipeline on: pull_request: branches: - - v1.4-andium + - main paths-ignore: - '**/README.md' - 'doc/**' push: branches: - - v1.4-andium + - main paths-ignore: - '**/README.md' - 'doc/**' @@ -27,7 +27,7 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: v1.4.1 + duckdb_version: main extension_name: spatial ci_tools_version: main vcpkg_commit: ce613c41372b23b1f51333815feb3edd87ef8a8b @@ -38,7 +38,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main secrets: inherit with: - duckdb_version: v1.4.1 + duckdb_version: main ci_tools_version: main extension_name: spatial deploy_latest: ${{ startsWith(github.ref, 'refs/heads/v') || github.ref == 'refs/heads/main' }} From 78b62171078c6d19222a1676ca80f5544799076e Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 20 Nov 2025 12:52:31 +0100 Subject: [PATCH 36/41] fix override --- CMakeLists.txt | 4 +++- src/spatial/index/rtree/rtree_index_create_physical.hpp | 3 ++- src/spatial/operators/spatial_join_physical.cpp | 4 ++-- src/spatial/operators/spatial_join_physical.hpp | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28530571..93c0fcf3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,9 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # Set extension name here set(TARGET_NAME spatial) set(EXTENSION_NAME ${TARGET_NAME}_extension) -set(CMAKE_CXX_STANDARD 11 CACHE STRING "C++ standard") +set(CMAKE_CXX_STANDARD + 11 + CACHE STRING "C++ standard") if(EMSCRIPTEN) # _LINKED_LIBS influences only Wasm compilation it's unclear why this is diff --git a/src/spatial/index/rtree/rtree_index_create_physical.hpp b/src/spatial/index/rtree/rtree_index_create_physical.hpp index 23da6db1..e6f3dc76 100644 --- a/src/spatial/index/rtree/rtree_index_create_physical.hpp +++ b/src/spatial/index/rtree/rtree_index_create_physical.hpp @@ -26,7 +26,8 @@ class PhysicalCreateRTreeIndex final : public PhysicalOperator { public: //! Source interface, NOOP for this operator - SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override { + SourceResultType GetDataInternal(ExecutionContext &context, DataChunk &chunk, + OperatorSourceInput &input) const override { return SourceResultType::FINISHED; } bool IsSource() const override { diff --git a/src/spatial/operators/spatial_join_physical.cpp b/src/spatial/operators/spatial_join_physical.cpp index dd36854b..bddf0934 100644 --- a/src/spatial/operators/spatial_join_physical.cpp +++ b/src/spatial/operators/spatial_join_physical.cpp @@ -1082,8 +1082,8 @@ unique_ptr PhysicalSpatialJoin::GetLocalSourceState(ExecutionC return std::move(lstate); } -SourceResultType PhysicalSpatialJoin::GetData(ExecutionContext &context, DataChunk &chunk, - OperatorSourceInput &input) const { +SourceResultType PhysicalSpatialJoin::GetDataInternal(ExecutionContext &context, DataChunk &chunk, + OperatorSourceInput &input) const { D_ASSERT(PropagatesBuildSide(join_type)); auto &gstate = input.global_state.Cast(); diff --git a/src/spatial/operators/spatial_join_physical.hpp b/src/spatial/operators/spatial_join_physical.hpp index 568e5177..e37523fc 100644 --- a/src/spatial/operators/spatial_join_physical.hpp +++ b/src/spatial/operators/spatial_join_physical.hpp @@ -72,7 +72,8 @@ class PhysicalSpatialJoin final : public PhysicalJoin { unique_ptr GetGlobalSourceState(ClientContext &context) const override; unique_ptr GetLocalSourceState(ExecutionContext &context, GlobalSourceState &gstate) const override; - SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override; + SourceResultType GetDataInternal(ExecutionContext &context, DataChunk &chunk, + OperatorSourceInput &input) const override; bool IsSource() const override { // The PhysicalSpatialJoin is only a source if the join type is RIGHT/OUTER From daafd7ddf27926c9d842238a3bba1e0ef0dfaf58 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 20 Nov 2025 16:32:34 +0100 Subject: [PATCH 37/41] fix bug in TryGetCachedBounds, add optimizer path for more spatial (inner) joins --- .../geometry/geometry_serialization.cpp | 2 +- .../operators/spatial_join_optimizer.cpp | 201 +++++++++++++----- .../operators/spatial_join_physical.cpp | 17 ++ 3 files changed, 169 insertions(+), 51 deletions(-) diff --git a/src/spatial/geometry/geometry_serialization.cpp b/src/spatial/geometry/geometry_serialization.cpp index a803f893..f869a682 100644 --- a/src/spatial/geometry/geometry_serialization.cpp +++ b/src/spatial/geometry/geometry_serialization.cpp @@ -290,7 +290,7 @@ void Serde::DeserializePrepared(sgl::prepared_geometry &result, ArenaAllocator & } uint32_t Serde::TryGetBounds(const string_t &blob, Box2D &bbox) { - GeometryExtent extent; + GeometryExtent extent = GeometryExtent::Empty(); const auto count = Geometry::GetExtent(blob, extent); if (count == 0) { return 0; diff --git a/src/spatial/operators/spatial_join_optimizer.cpp b/src/spatial/operators/spatial_join_optimizer.cpp index a57af821..effb7c4c 100644 --- a/src/spatial/operators/spatial_join_optimizer.cpp +++ b/src/spatial/operators/spatial_join_optimizer.cpp @@ -6,6 +6,7 @@ #include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/operator/logical_any_join.hpp" #include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" +#include "duckdb/planner/operator/logical_comparison_join.hpp" #include "duckdb/planner/operator/logical_filter.hpp" #include "spatial/spatial_types.hpp" @@ -30,17 +31,17 @@ static const case_insensitive_map_t spatial_predicate_inverse_map = { {"ST_WithinProperly", "ST_ContainsProperly"}, // Inverse {"ST_ContainsProperly", "ST_WithinProperly"}, // Inverse {"ST_DWithin", "ST_DWithin"}, // Symmetric (when distance is constant) - }; -unique_ptr TryGetInversePredicate(ClientContext &context, unique_ptr expr) { +static bool HasInversePredicate(const string &func_name) { + return spatial_predicate_inverse_map.find(func_name) != spatial_predicate_inverse_map.end(); +} + +static unique_ptr GetInversePredicate(ClientContext &context, unique_ptr expr) { auto &func = expr->Cast(); - const auto it = spatial_predicate_inverse_map.find(func.function.name); - if (it == spatial_predicate_inverse_map.end()) { - // We cant do anything - return nullptr; - } + const auto it = spatial_predicate_inverse_map.find(func.function.name); + D_ASSERT(it != spatial_predicate_inverse_map.end()); // Swap the arguments std::swap(func.children[0], func.children[1]); @@ -60,7 +61,131 @@ unique_ptr TryGetInversePredicate(ClientContext &context, unique_ptr nullptr, func.is_operator); } -static void InsertSpatialJoin(OptimizerExtensionInput &input, unique_ptr &plan) { +static bool IsSpatialJoinPredicate(const unique_ptr &expr, + const unordered_set &left_bindings, const unordered_set &right_bindings, bool &needs_flipping) { + + const auto total_side = JoinSide::GetJoinSide(*expr, left_bindings, right_bindings); + + if (total_side != JoinSide::BOTH) { + return false; + } + + // Check if the expression is a spatial predicate + if (expr->type != ExpressionType::BOUND_FUNCTION) { + return false; + } + + auto &func = expr->Cast(); + + // The function must be a binary predicate + if (func.children.size() != 2) { + return false; + } + + // The function must operate on two GEOMETRY types + if (func.children[0]->return_type != LogicalType::GEOMETRY() || + func.children[1]->return_type != LogicalType::GEOMETRY()) { + return false; + } + + // The function must be a recognized spatial predicate + if (spatial_predicate_map.count(func.function.name) == 0) { + return false; + } + + const auto left_side = JoinSide::GetJoinSide(*func.children[0], left_bindings, right_bindings); + const auto right_side = JoinSide::GetJoinSide(*func.children[1], left_bindings, right_bindings); + + // Can the condition can be cleanly split into two sides? + if (left_side == JoinSide::BOTH || right_side == JoinSide::BOTH) { + return false; + } + + if (left_side == JoinSide::RIGHT) { + if (!HasInversePredicate(func.function.name)) { + return false; + } + needs_flipping = true; + } + + return true; +} + + +static bool TrySwapComparisonJoin(OptimizerExtensionInput &input, unique_ptr &plan) { + auto &op = *plan; + + if (op.type != LogicalOperatorType::LOGICAL_FILTER) { + return false; + } + + auto &filter = op.Cast(); + if (filter.expressions.size() != 1) { + return false; + } + + // TODO: This is rarely the case, because there might be projections inbetween. + // TODO: Handle projections between filter and join + auto &child = *op.children[0]; + if (child.type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN) { + return false; + } + + // Can only do this safely for INNER joins + auto &cmp_join = child.Cast(); + if (cmp_join.join_type != JoinType::INNER) { + return false; + } + + // Get the table indexes that are reachable from the left and right children + const auto &left_child = cmp_join.children[0]; + const auto &right_child = cmp_join.children[1]; + unordered_set left_bindings; + unordered_set right_bindings; + LogicalJoin::GetTableReferences(*left_child, left_bindings); + LogicalJoin::GetTableReferences(*right_child, right_bindings); + + // Check if the filter expression contains a spatial predicate + auto expr = filter.expressions[0]->Copy(); + bool needs_flipping = false; + if (!IsSpatialJoinPredicate(expr, left_bindings, right_bindings, needs_flipping)) { + return false; + } + + if (needs_flipping) { + expr = GetInversePredicate(input.context, std::move(expr)); + } + + // Cool. Now pull up the join condition into a filter, and create a spatial join + auto spatial_join = make_uniq(cmp_join.join_type); + spatial_join->spatial_predicate = std::move(expr); + spatial_join->children = std::move(cmp_join.children); + spatial_join->expressions = std::move(cmp_join.expressions); + spatial_join->types = std::move(cmp_join.types); + spatial_join->left_projection_map = std::move(cmp_join.left_projection_map); + spatial_join->right_projection_map = std::move(cmp_join.right_projection_map); + spatial_join->join_stats = std::move(cmp_join.join_stats); + spatial_join->mark_index = cmp_join.mark_index; + spatial_join->has_estimated_cardinality = cmp_join.has_estimated_cardinality; + spatial_join->estimated_cardinality = cmp_join.estimated_cardinality; + + // If this is ST_DWithin, try to extract the constant distance value + const auto &pred_func = spatial_join->spatial_predicate->Cast(); + if (pred_func.function.name == "ST_DWithin") { + // Try to get the constant distance value from the bind data; + spatial_join->has_const_distance = + ST_DWithinHelper::TryGetConstDistance(pred_func.bind_info, spatial_join->const_distance); + } + + // Also take all the conditions from the comparison join and add them as filters + filter.expressions.clear(); + filter.expressions.push_back(JoinCondition::CreateExpression(std::move(cmp_join.conditions))); + filter.children[0] = std::move(spatial_join); + + return true; +} + +static void TrySwapAnyJoin(OptimizerExtensionInput &input, unique_ptr &plan) { auto &op = *plan; // We only care about ANY_JOIN operators @@ -118,56 +243,22 @@ static void InsertSpatialJoin(OptimizerExtensionInput &input, unique_ptrtype != ExpressionType::BOUND_FUNCTION) { + bool needs_flipping = false; + if (!IsSpatialJoinPredicate(expr, left_bindings, right_bindings, needs_flipping)) { + // Not a spatial predicate extra_predicates.push_back(std::move(expr)); continue; } - auto &func = expr->Cast(); - - // The function must be a binary predicate - if (func.children.size() != 2) { - extra_predicates.push_back(std::move(expr)); - continue; - } - - // The function must operate on two GEOMETRY types - if (func.children[0]->return_type != LogicalType::GEOMETRY() || - func.children[1]->return_type != LogicalType::GEOMETRY()) { - extra_predicates.push_back(std::move(expr)); - continue; - } - - // The function must be a recognized spatial predicate - if (spatial_predicate_map.count(func.function.name) == 0) { - extra_predicates.push_back(std::move(expr)); - continue; - } - - auto left_side = JoinSide::GetJoinSide(*func.children[0], left_bindings, right_bindings); - auto right_side = JoinSide::GetJoinSide(*func.children[1], left_bindings, right_bindings); - - // Can the condition can be cleanly split into two sides? - if (left_side == JoinSide::BOTH || right_side == JoinSide::BOTH) { - extra_predicates.push_back(std::move(expr)); - continue; - } - - if (left_side == JoinSide::RIGHT) { - expr = TryGetInversePredicate(input.context, std::move(expr)); - if (expr == nullptr) { - // We cant flip this, abort! - return; - } + if (needs_flipping) { + expr = GetInversePredicate(input.context, std::move(expr)); } spatial_pred_expr = std::move(expr); @@ -183,7 +274,7 @@ static void InsertSpatialJoin(OptimizerExtensionInput &input, unique_ptr(any_join.join_type); - // Steal the properties from the any join + // Steal the properties from the any-join spatial_join->spatial_predicate = std::move(spatial_pred_expr); spatial_join->extra_conditions = std::move(extra_predicates); spatial_join->children = std::move(any_join.children); @@ -208,6 +299,16 @@ static void InsertSpatialJoin(OptimizerExtensionInput &input, unique_ptr &plan) { + auto &op = *plan; + + if (TrySwapComparisonJoin(input, plan)) { + return; + } + + TrySwapAnyJoin(input, plan); +} + static void TryInsertSpatialJoin(OptimizerExtensionInput &input, unique_ptr &plan) { InsertSpatialJoin(input, plan); diff --git a/src/spatial/operators/spatial_join_physical.cpp b/src/spatial/operators/spatial_join_physical.cpp index bddf0934..b3dbfa57 100644 --- a/src/spatial/operators/spatial_join_physical.cpp +++ b/src/spatial/operators/spatial_join_physical.cpp @@ -718,6 +718,18 @@ class SpatialJoinGlobalOperatorState final : public GlobalOperatorState { public: unique_ptr rtree; unique_ptr collection; + +#ifdef DUCKDB_SPATIAL_DEBUG_JOIN + // TODO: Move this into proper profiling metrics later + // Statistics + atomic total_rtree_probes = {0}; + atomic total_rtree_candidates = {0}; + + ~SpatialJoinGlobalOperatorState() override { + Printer::PrintF("Spatial Join Stats: RTree Probes: %llu, RTree Candidates: %llu\n", + total_rtree_probes.load(), total_rtree_candidates.load()); + } +#endif }; unique_ptr PhysicalSpatialJoin::GetOperatorState(ExecutionContext &context) const { @@ -836,6 +848,11 @@ OperatorResultType PhysicalSpatialJoin::ExecuteInternal(ExecutionContext &contex continue; } +#ifdef DUCKDB_SPATIAL_DEBUG_JOIN + gstate.total_rtree_candidates += lstate.scan.matches_count; + gstate.total_rtree_probes += 1; +#endif + lstate.state = SpatialJoinState::SCAN; } // fall through //-------------------------------------------------------------------------------------------------------------- From f6910406670bbac4cc4607223cf8f95a1eec6d0d Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Thu, 20 Nov 2025 16:32:56 +0100 Subject: [PATCH 38/41] format --- src/spatial/operators/spatial_join_optimizer.cpp | 9 ++++----- src/spatial/operators/spatial_join_physical.cpp | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/spatial/operators/spatial_join_optimizer.cpp b/src/spatial/operators/spatial_join_optimizer.cpp index effb7c4c..9a987ea9 100644 --- a/src/spatial/operators/spatial_join_optimizer.cpp +++ b/src/spatial/operators/spatial_join_optimizer.cpp @@ -61,8 +61,8 @@ static unique_ptr GetInversePredicate(ClientContext &context, unique nullptr, func.is_operator); } -static bool IsSpatialJoinPredicate(const unique_ptr &expr, - const unordered_set &left_bindings, const unordered_set &right_bindings, bool &needs_flipping) { +static bool IsSpatialJoinPredicate(const unique_ptr &expr, const unordered_set &left_bindings, + const unordered_set &right_bindings, bool &needs_flipping) { const auto total_side = JoinSide::GetJoinSide(*expr, left_bindings, right_bindings); @@ -84,7 +84,7 @@ static bool IsSpatialJoinPredicate(const unique_ptr &expr, // The function must operate on two GEOMETRY types if (func.children[0]->return_type != LogicalType::GEOMETRY() || - func.children[1]->return_type != LogicalType::GEOMETRY()) { + func.children[1]->return_type != LogicalType::GEOMETRY()) { return false; } @@ -111,7 +111,6 @@ static bool IsSpatialJoinPredicate(const unique_ptr &expr, return true; } - static bool TrySwapComparisonJoin(OptimizerExtensionInput &input, unique_ptr &plan) { auto &op = *plan; @@ -174,7 +173,7 @@ static bool TrySwapComparisonJoin(OptimizerExtensionInput &input, unique_ptrhas_const_distance = - ST_DWithinHelper::TryGetConstDistance(pred_func.bind_info, spatial_join->const_distance); + ST_DWithinHelper::TryGetConstDistance(pred_func.bind_info, spatial_join->const_distance); } // Also take all the conditions from the comparison join and add them as filters diff --git a/src/spatial/operators/spatial_join_physical.cpp b/src/spatial/operators/spatial_join_physical.cpp index b3dbfa57..f4ff1053 100644 --- a/src/spatial/operators/spatial_join_physical.cpp +++ b/src/spatial/operators/spatial_join_physical.cpp @@ -726,8 +726,8 @@ class SpatialJoinGlobalOperatorState final : public GlobalOperatorState { atomic total_rtree_candidates = {0}; ~SpatialJoinGlobalOperatorState() override { - Printer::PrintF("Spatial Join Stats: RTree Probes: %llu, RTree Candidates: %llu\n", - total_rtree_probes.load(), total_rtree_candidates.load()); + Printer::PrintF("Spatial Join Stats: RTree Probes: %llu, RTree Candidates: %llu\n", total_rtree_probes.load(), + total_rtree_candidates.load()); } #endif }; From 91551106f08e120ddbb2124b44accf2e53643f5a Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 21 Nov 2025 13:56:59 +0100 Subject: [PATCH 39/41] use arena for geos deserialization --- src/spatial/modules/geos/geos_module.cpp | 124 ++++++++++++----------- src/spatial/modules/geos/geos_serde.cpp | 79 ++++++++++----- src/spatial/modules/geos/geos_serde.hpp | 5 +- 3 files changed, 120 insertions(+), 88 deletions(-) diff --git a/src/spatial/modules/geos/geos_module.cpp b/src/spatial/modules/geos/geos_module.cpp index 448c4a7a..ee75015a 100644 --- a/src/spatial/modules/geos/geos_module.cpp +++ b/src/spatial/modules/geos/geos_module.cpp @@ -8,6 +8,8 @@ #include "duckdb/common/vector_operations/generic_executor.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" #include "duckdb/planner/expression/bound_function_expression.hpp" +#include "spatial/geometry/geometry_serialization.hpp" +#include "spatial/geometry/sgl.hpp" namespace duckdb { @@ -26,18 +28,23 @@ class LocalState final : public FunctionLocalState { static LocalState &ResetAndGet(ExpressionState &state) { auto &local_state = ExecuteFunctionState::GetFunctionState(state)->Cast(); + local_state.arena.Reset(); return local_state; } + ArenaAllocator &GetArena() { + return arena; + } + GEOSContextHandle_t GetContext() const { return ctx; } - GeosGeometry Deserialize(const string_t &blob) const; + GeosGeometry Deserialize(const string_t &blob); string_t Serialize(Vector &result, const GeosGeometry &geom) const; // Most GEOS functions do not use an arena, so just use the default allocator - explicit LocalState(ClientContext &context) { + explicit LocalState(ClientContext &context) : arena(BufferAllocator::Get(context)) { ctx = GEOS_init_r(); GEOSContext_setErrorMessageHandler_r( @@ -49,6 +56,7 @@ class LocalState final : public FunctionLocalState { } private: + ArenaAllocator arena; GEOSContextHandle_t ctx; }; @@ -69,11 +77,11 @@ string_t LocalState::Serialize(Vector &result, const GeosGeometry &geom) const { return blob; } -GeosGeometry LocalState::Deserialize(const string_t &blob) const { +GeosGeometry LocalState::Deserialize(const string_t &blob) { const auto blob_ptr = blob.GetData(); const auto blob_len = blob.GetSize(); - const auto geom = GeosSerde::Deserialize(ctx, blob_ptr, blob_len); + const auto geom = GeosSerde::Deserialize(ctx, arena, blob_ptr, blob_len); if (geom == nullptr) { throw InvalidInputException("Could not deserialize geometry"); @@ -94,7 +102,7 @@ template class SymmetricPreparedBinaryFunction { public: static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); auto &lhs_vec = args.data[0]; auto &rhs_vec = args.data[1]; @@ -143,7 +151,7 @@ template class AsymmetricPreparedBinaryFunction { public: static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); auto &lhs_vec = args.data[0]; auto &rhs_vec = args.data[1]; @@ -456,7 +464,7 @@ struct ST_AsMVTGeom { struct ST_Boundary { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::ExecuteWithNulls( args.data[0], result, args.size(), [&](const string_t &geom_blob, ValidityMask &mask, idx_t row_idx) { @@ -491,7 +499,7 @@ struct ST_Boundary { struct ST_Buffer { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &blob, double radius) { @@ -502,7 +510,7 @@ struct ST_Buffer { } static void ExecuteWithSegments(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); TernaryExecutor::Execute( args.data[0], args.data[1], args.data[2], result, args.size(), @@ -529,7 +537,7 @@ struct ST_Buffer { } static void ExecuteWithStyle(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); SenaryExecutor::Execute( args, result, @@ -614,7 +622,7 @@ struct ST_Buffer { struct ST_BuildArea { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -745,7 +753,7 @@ struct ST_WithinProperly : AsymmetricPreparedBinaryFunction { struct ST_ConcaveHull { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); TernaryExecutor::Execute( args.data[0], args.data[1], args.data[2], result, args.size(), @@ -781,7 +789,7 @@ struct ST_ConcaveHull { struct ST_ConvexHull { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -820,7 +828,7 @@ struct ST_CoverageInvalidEdges { } static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnifiedVectorFormat format; @@ -911,7 +919,7 @@ struct ST_CoverageSimplify { } static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnifiedVectorFormat format; @@ -989,7 +997,7 @@ struct ST_CoverageSimplify { struct ST_CoverageUnion { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnifiedVectorFormat format; @@ -1126,7 +1134,7 @@ struct ST_Crosses : SymmetricPreparedBinaryFunction { struct ST_Difference { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &lhs_blob, const string_t &rhs_blob) { @@ -1210,7 +1218,7 @@ struct ST_DistanceWithin { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { // Because this takes an extra argument, we cant reuse the SymmetricPreparedBinary... - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); auto &lhs_vec = args.data[0]; auto &rhs_vec = args.data[1]; @@ -1301,7 +1309,7 @@ struct ST_DistanceWithin { struct ST_Equals { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &lhs_blob, const string_t &rhs_blob) { @@ -1330,7 +1338,7 @@ struct ST_Equals { struct ST_Envelope { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -1358,7 +1366,7 @@ struct ST_Envelope { struct ST_Intersection { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &lhs_blob, const string_t &rhs_blob) { @@ -1416,7 +1424,7 @@ struct ST_Intersects : SymmetricPreparedBinaryFunction { struct ST_IsRing { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); return geom.is_ring(); @@ -1442,7 +1450,7 @@ struct ST_IsRing { struct ST_IsSimple { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); return geom.is_simple(); @@ -1501,7 +1509,7 @@ struct ST_IsValid { struct ST_LineMerge { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geometry_blob) { const auto geometry = lstate.Deserialize(geometry_blob); @@ -1511,7 +1519,7 @@ struct ST_LineMerge { } static void ExecuteWithDirection(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &geometry_blob, bool preserve_direction) { const auto geometry = lstate.Deserialize(geometry_blob); @@ -1547,7 +1555,7 @@ struct ST_LineMerge { struct ST_MakeValid { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -1575,7 +1583,7 @@ struct ST_MakeValid { struct ST_MaximumInscribedCircle { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); auto &struct_vecs = StructVector::GetEntries(result); auto ¢er_vec = *struct_vecs[0]; @@ -1603,7 +1611,7 @@ struct ST_MaximumInscribedCircle { } static void ExecuteWithTolerance(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); auto &struct_vecs = StructVector::GetEntries(result); auto ¢er_vec = *struct_vecs[0]; @@ -1679,7 +1687,7 @@ struct ST_MaximumInscribedCircle { struct ST_MinimumRotatedRectangle { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -1709,7 +1717,7 @@ struct ST_MinimumRotatedRectangle { struct ST_Node { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -1750,7 +1758,7 @@ struct ST_Node { struct ST_Normalize { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); geom.normalize_in_place(); @@ -1802,7 +1810,7 @@ struct ST_Overlaps : SymmetricPreparedBinaryFunction { struct ST_PointOnSurface { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -1831,7 +1839,7 @@ struct ST_PointOnSurface { struct ST_Polygonize { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnifiedVectorFormat format; @@ -1897,7 +1905,7 @@ struct ST_Polygonize { struct ST_ReducePrecision { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute( args.data[0], args.data[1], result, args.size(), [&](const string_t &geom_blob, double precision) { @@ -1927,7 +1935,7 @@ struct ST_ReducePrecision { struct ST_RemoveRepeatedPoints { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -1937,7 +1945,7 @@ struct ST_RemoveRepeatedPoints { } static void ExecuteWithTolerance(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute( args.data[0], args.data[1], result, args.size(), [&](const string_t &geom_blob, double tolerance) { @@ -1975,7 +1983,7 @@ struct ST_RemoveRepeatedPoints { struct ST_Reverse { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -2003,7 +2011,7 @@ struct ST_Reverse { struct ST_ShortestLine { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &lhs_blob, const string_t &rhs_blob) { const auto lhs = lstate.Deserialize(lhs_blob); @@ -2033,7 +2041,7 @@ struct ST_ShortestLine { struct ST_Simplify { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &geom_blob, double tolerance) { const auto geom = lstate.Deserialize(geom_blob); @@ -2061,7 +2069,7 @@ struct ST_Simplify { struct ST_SimplifyPreserveTopology { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute( args.data[0], args.data[1], result, args.size(), [&](const string_t &geom_blob, double tolerance) { const auto geom = lstate.Deserialize(geom_blob); @@ -2115,7 +2123,7 @@ struct ST_Touches : SymmetricPreparedBinaryFunction { struct ST_Union { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); BinaryExecutor::Execute(args.data[0], args.data[1], result, args.size(), [&](const string_t &lhs_blob, const string_t &rhs_blob) { @@ -2145,7 +2153,7 @@ struct ST_Union { struct ST_VoronoiDiagram { static void Execute(DataChunk &args, ExpressionState &state, Vector &result) { - const auto &lstate = LocalState::ResetAndGet(state); + auto &lstate = LocalState::ResetAndGet(state); UnaryExecutor::Execute(args.data[0], result, args.size(), [&](const string_t &geom_blob) { const auto geom = lstate.Deserialize(geom_blob); @@ -2225,11 +2233,11 @@ struct GeosUnaryAggFunction { } // Deserialize a GEOS geometry - static GEOSGeometry *Deserialize(const GEOSContextHandle_t context, const string_t &blob) { + static GEOSGeometry *Deserialize(const GEOSContextHandle_t context, ArenaAllocator &arena, const string_t &blob) { const auto ptr = blob.GetData(); const auto size = blob.GetSize(); - return GeosSerde::Deserialize(context, ptr, size); + return GeosSerde::Deserialize(context, arena, ptr, size); } template @@ -2253,11 +2261,11 @@ struct GeosUnaryAggFunction { } template - static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &) { + static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &agg) { if (!state.geom) { - state.geom = Deserialize(state.context, input); + state.geom = Deserialize(state.context, agg.input.allocator, input); } else { - auto next = Deserialize(state.context, input); + auto next = Deserialize(state.context, agg.input.allocator, input); auto curr = state.geom; state.geom = OP::Merge(state.context, curr, next); GEOSGeom_destroy_r(state.context, next); @@ -2266,10 +2274,10 @@ struct GeosUnaryAggFunction { } template - static void ConstantOperation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &, idx_t) { + static void ConstantOperation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &agg, idx_t) { // There is no point in doing anything else, intersection and union is idempotent if (!state.geom) { - state.geom = Deserialize(state.context, input); + state.geom = Deserialize(state.context, agg.input.allocator, input); } } @@ -2376,11 +2384,11 @@ struct ST_Union_Agg { } // Deserialize a GEOS geometry - static GEOSGeometry *Deserialize(const GEOSContextHandle_t context, const string_t &blob) { + static GEOSGeometry *Deserialize(const GEOSContextHandle_t context, ArenaAllocator &arena, const string_t &blob) { const auto ptr = blob.GetData(); const auto size = blob.GetSize(); - return GeosSerde::Deserialize(context, ptr, size); + return GeosSerde::Deserialize(context, arena, ptr, size); } static void Initialize(const AggregateFunction &, data_ptr_t state_mem) { @@ -2389,7 +2397,7 @@ struct ST_Union_Agg { state.context = GEOS_init_r(); } - static void Update(Vector inputs[], AggregateInputData &, idx_t, Vector &state_vec, idx_t count) { + static void Update(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &state_vec, idx_t count) { auto &geom_vec = inputs[0]; @@ -2412,7 +2420,7 @@ struct ST_Union_Agg { // Now, deserialize the geometry and append it to the list in each state auto &state = *state_ptr[state_idx]; - const auto geom = Deserialize(state.context, geom_ptr[geom_idx]); + const auto geom = Deserialize(state.context, aggr.allocator, geom_ptr[geom_idx]); state.geoms.push_back(geom); } } @@ -2574,11 +2582,11 @@ struct GEOSCoverageAggFunction { } // Deserialize a GEOS geometry - static GEOSGeometry *Deserialize(const GEOSContextHandle_t context, const string_t &blob) { + static GEOSGeometry *Deserialize(const GEOSContextHandle_t context, ArenaAllocator &arena, const string_t &blob) { const auto ptr = blob.GetData(); const auto size = blob.GetSize(); - return GeosSerde::Deserialize(context, ptr, size); + return GeosSerde::Deserialize(context, arena, ptr, size); } static void Initialize(const AggregateFunction &, data_ptr_t state_mem) { @@ -2765,7 +2773,7 @@ struct ST_CoverageSimplify_Agg : GEOSCoverageAggFunction { // Now, deserialize the geometry and append it to the list in each state auto &state = *state_ptr[state_idx]; - const auto geom = Deserialize(state.context, geom_ptr[geom_idx]); + const auto geom = Deserialize(state.context, aggr_input_data.allocator, geom_ptr[geom_idx]); state.geoms.push_back(geom); // Also set parameters @@ -2839,7 +2847,7 @@ struct ST_CoverageUnion_Agg : GEOSCoverageAggFunction { // Now, deserialize the geometry and append it to the list in each state auto &state = *state_ptr[state_idx]; - const auto geom = Deserialize(state.context, geom_ptr[geom_idx]); + const auto geom = Deserialize(state.context, aggr_input_data.allocator, geom_ptr[geom_idx]); state.geoms.push_back(geom); // Also set parameters @@ -2923,7 +2931,7 @@ struct ST_CoverageInvalidEdges_Agg : GEOSCoverageAggFunction { // Now, deserialize the geometry and append it to the list in each state auto &state = *state_ptr[state_idx]; - const auto geom = Deserialize(state.context, geom_ptr[geom_idx]); + const auto geom = Deserialize(state.context, aggr_input_data.allocator, geom_ptr[geom_idx]); state.geoms.push_back(geom); // Also set parameters diff --git a/src/spatial/modules/geos/geos_serde.cpp b/src/spatial/modules/geos/geos_serde.cpp index 633020d2..65ef1575 100644 --- a/src/spatial/modules/geos/geos_serde.cpp +++ b/src/spatial/modules/geos/geos_serde.cpp @@ -6,6 +6,7 @@ #include "sgl/sgl.hpp" #include "duckdb/common/assert.hpp" +#include "duckdb/storage/arena_allocator.hpp" #include "spatial/util/binary_writer.hpp" #include "spatial/util/math.hpp" #include "spatial/util/binary_reader.hpp" @@ -205,10 +206,16 @@ void GeosSerde::Serialize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom, char //------------------------------------------------------------------------------ // Deserialize //------------------------------------------------------------------------------ -static GEOSGeom_t *DeserializeInternal(BinaryReader &reader, GEOSContextHandle_t ctx); +static GEOSGeom_t *DeserializeInternal(BinaryReader &reader, ArenaAllocator &arena, GEOSContextHandle_t ctx); + +template +static T *AllocateArray(ArenaAllocator &arena, size_t count) { + return reinterpret_cast(arena.AllocateAligned(count * sizeof(T))); +} template -static GEOSGeom_t *DeserializeTemplated(BinaryReader &reader, GEOSContextHandle_t ctx, sgl::geometry_type type) { +static GEOSGeom_t *DeserializeTemplated(BinaryReader &reader, ArenaAllocator &arena, GEOSContextHandle_t ctx, + sgl::geometry_type type) { constexpr auto VERTEX_SIZE = V::HAS_Z + V::HAS_M + 2; switch (type) { @@ -225,11 +232,10 @@ static GEOSGeom_t *DeserializeTemplated(BinaryReader &reader, GEOSContextHandle_ if (vert_count == 0) { return GEOSGeom_createEmptyLineString_r(ctx); } - auto vert_array = new double[vert_count * VERTEX_SIZE]; + auto vert_array = AllocateArray(arena, vert_count * VERTEX_SIZE); auto ptr = reader.Reserve(vert_count * VERTEX_SIZE * sizeof(double)); memcpy(vert_array, ptr, vert_count * VERTEX_SIZE * sizeof(double)); auto seq = GEOSCoordSeq_copyFromBuffer_r(ctx, vert_array, vert_count, V::HAS_Z, V::HAS_M); - delete[] vert_array; return GEOSGeom_createLineString_r(ctx, seq); } case sgl::geometry_type::POLYGON: { @@ -237,56 +243,67 @@ static GEOSGeom_t *DeserializeTemplated(BinaryReader &reader, GEOSContextHandle_ if (ring_count == 0) { return GEOSGeom_createEmptyPolygon_r(ctx); } - vector rings; + auto ring_array = AllocateArray(ring_count); for (uint32_t i = 0; i < ring_count; i++) { const auto vert_count = reader.Read(); - auto vert_array = new double[vert_count * VERTEX_SIZE]; + auto vert_array = AllocateArray(arena, vert_count * VERTEX_SIZE); auto ptr = reader.Reserve(vert_count * VERTEX_SIZE * sizeof(double)); memcpy(vert_array, ptr, vert_count * VERTEX_SIZE * sizeof(double)); auto seq = GEOSCoordSeq_copyFromBuffer_r(ctx, vert_array, vert_count, V::HAS_Z, V::HAS_M); - delete[] vert_array; - rings.push_back(GEOSGeom_createLinearRing_r(ctx, seq)); + ring_array[i] = GEOSGeom_createLinearRing_r(ctx, seq); } - return GEOSGeom_createPolygon_r(ctx, rings[0], rings.data() + 1, ring_count - 1); + return GEOSGeom_createPolygon_r(ctx, ring_array[0], ring_array + 1, ring_count - 1); } case sgl::geometry_type::MULTI_POINT: { - vector rings; const auto part_count = reader.Read(); + if (part_count == 0) { + return GEOSGeom_createEmptyCollection_r(ctx, GEOS_MULTIPOINT); + } + const auto part_array = AllocateArray(arena, part_count); for (uint32_t i = 0; i < part_count; i++) { - rings.push_back(DeserializeInternal(reader, ctx)); + part_array[i] = DeserializeInternal(reader, arena, ctx); } - return GEOSGeom_createCollection_r(ctx, GEOS_MULTIPOINT, rings.data(), part_count); + return GEOSGeom_createCollection_r(ctx, GEOS_MULTIPOINT, part_array, part_count); } case sgl::geometry_type::MULTI_LINESTRING: { - vector rings; const auto part_count = reader.Read(); + if (part_count == 0) { + return GEOSGeom_createEmptyCollection_r(ctx, GEOS_MULTILINESTRING); + } + const auto part_array = AllocateArray(arena, part_count); for (uint32_t i = 0; i < part_count; i++) { - rings.push_back(DeserializeInternal(reader, ctx)); + part_array[i] = DeserializeInternal(reader, arena, ctx); } - return GEOSGeom_createCollection_r(ctx, GEOS_MULTILINESTRING, rings.data(), part_count); + return GEOSGeom_createCollection_r(ctx, GEOS_MULTILINESTRING, part_array, part_count); } case sgl::geometry_type::MULTI_POLYGON: { - vector rings; const auto part_count = reader.Read(); + if (part_count == 0) { + return GEOSGeom_createEmptyCollection_r(ctx, GEOS_MULTIPOLYGON); + } + const auto part_array = AllocateArray(arena, part_count); for (uint32_t i = 0; i < part_count; i++) { - rings.push_back(DeserializeInternal(reader, ctx)); + part_array[i] = DeserializeInternal(reader, arena, ctx); } - return GEOSGeom_createCollection_r(ctx, GEOS_MULTIPOLYGON, rings.data(), part_count); + return GEOSGeom_createCollection_r(ctx, GEOS_MULTIPOLYGON, part_array, part_count); } case sgl::geometry_type::GEOMETRY_COLLECTION: { - vector rings; const auto part_count = reader.Read(); + if (part_count == 0) { + return GEOSGeom_createEmptyCollection_r(ctx, GEOS_GEOMETRYCOLLECTION); + } + const auto part_array = AllocateArray(arena, part_count); for (uint32_t i = 0; i < part_count; i++) { - rings.push_back(DeserializeInternal(reader, ctx)); + part_array[i] = DeserializeInternal(reader, arena, ctx); } - return GEOSGeom_createCollection_r(ctx, GEOS_GEOMETRYCOLLECTION, rings.data(), part_count); + return GEOSGeom_createCollection_r(ctx, GEOS_GEOMETRYCOLLECTION, part_array, part_count); } default: throw InvalidInputException("Unsupported geometry type %d", static_cast(type)); } } -static GEOSGeom_t *DeserializeInternal(BinaryReader &reader, GEOSContextHandle_t ctx) { +static GEOSGeom_t *DeserializeInternal(BinaryReader &reader, ArenaAllocator &arena, GEOSContextHandle_t ctx) { while (true) { const auto le = reader.Read(); @@ -301,22 +318,28 @@ static GEOSGeom_t *DeserializeInternal(BinaryReader &reader, GEOSContextHandle_t const auto has_m = (flag & 0x02) != 0; if (has_z && has_m) { - return DeserializeTemplated(reader, ctx, type); + return DeserializeTemplated(reader, arena, ctx, type); } if (has_z) { - return DeserializeTemplated(reader, ctx, type); + return DeserializeTemplated(reader, arena, ctx, type); } if (has_m) { - return DeserializeTemplated(reader, ctx, type); + return DeserializeTemplated(reader, arena, ctx, type); } else { - return DeserializeTemplated(reader, ctx, type); + return DeserializeTemplated(reader, arena, ctx, type); } } } -GEOSGeom_t *GeosSerde::Deserialize(GEOSContextHandle_t ctx, const char *buffer, size_t buffer_size) { +GEOSGeom_t *GeosSerde::Deserialize(GEOSContextHandle_t ctx, ArenaAllocator &arena, const char *buffer, + size_t buffer_size) { + // GEOS always does full copies of the data, + // so reset the arena after each deserialization + arena.Reset(); + + // Deserialize the geometry BinaryReader reader(buffer, buffer_size); - return DeserializeInternal(reader, ctx); + return DeserializeInternal(reader, arena, ctx); } } // namespace duckdb diff --git a/src/spatial/modules/geos/geos_serde.hpp b/src/spatial/modules/geos/geos_serde.hpp index 6da77d05..e526e66c 100644 --- a/src/spatial/modules/geos/geos_serde.hpp +++ b/src/spatial/modules/geos/geos_serde.hpp @@ -14,7 +14,8 @@ class ArenaAllocator; struct GeosSerde { static size_t GetRequiredSize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom); static void Serialize(GEOSContextHandle_t ctx, const GEOSGeom_t *geom, char *buffer, size_t buffer_size); - static GEOSGeom_t *Deserialize(GEOSContextHandle_t ctx, const char *buffer, size_t buffer_size); + static GEOSGeom_t *Deserialize(GEOSContextHandle_t ctx, ArenaAllocator &arena, const char *buffer, + size_t buffer_size); }; -} // namespace duckdb \ No newline at end of file +} // namespace duckdb From 3868eed9a97ff9eb840dba7050530ff15ba54ffa Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 21 Nov 2025 13:58:06 +0100 Subject: [PATCH 40/41] tweak spatial join --- .../operators/spatial_join_physical.cpp | 155 +++++++++++++----- 1 file changed, 112 insertions(+), 43 deletions(-) diff --git a/src/spatial/operators/spatial_join_physical.cpp b/src/spatial/operators/spatial_join_physical.cpp index f4ff1053..3630b1e7 100644 --- a/src/spatial/operators/spatial_join_physical.cpp +++ b/src/spatial/operators/spatial_join_physical.cpp @@ -21,6 +21,8 @@ namespace duckdb { // Flat RTree //====================================================================================================================== +// #define DUCKDB_SPATIAL_DEBUG_JOIN + namespace { template @@ -132,11 +134,12 @@ class FlatRTree { return current_position++; } - void Sort(vector &curve) { - Sort(curve, 0, curve.size() - 1); + static void Sort(vector &curve, typed_view &box_array, typed_view &idx_array) { + Sort(curve, box_array, idx_array, 0, curve.size() - 1); } - void Sort(vector &curve, size_t l_idx, size_t r_idx) { + static void Sort(vector &curve, typed_view &box_array, typed_view &idx_array, size_t l_idx, + size_t r_idx) { if (l_idx < r_idx) { const auto pivot = curve[(l_idx + r_idx) >> 1]; auto pivot_l = l_idx - 1; @@ -161,8 +164,55 @@ class FlatRTree { std::swap(idx_array[pivot_l], idx_array[pivot_r]); } - Sort(curve, l_idx, pivot_r); - Sort(curve, pivot_r + 1, r_idx); + Sort(curve, box_array, idx_array, l_idx, pivot_r); + Sort(curve, box_array, idx_array, pivot_r + 1, r_idx); + } + } + + void STRSort(typed_view &box_array, typed_view idx_array) { + // Perform Sort-tile-recursive (STR) packing + + const auto num_leaf_nodes = (item_count + node_size - 1) / node_size; + const auto num_vertical_slices = static_cast(std::ceil(std::sqrt(num_leaf_nodes))); + const auto slice_size = (item_count + num_vertical_slices - 1) / num_vertical_slices; + + vector indexes; + for (uint32_t i = 0; i < item_count; i++) { + indexes.push_back(i); + } + + // Sort by x-axis into vertical slices + std::sort(indexes.begin(), indexes.end(), + [&](uint32_t a, uint32_t b) { return box_array[a].Center().x < box_array[b].Center().x; }); + + // Then sort each vertical slice by y-axis + for (uint32_t slice_idx = 0; slice_idx < num_vertical_slices; slice_idx++) { + const auto slice_beg = slice_idx * slice_size; + const auto slice_end = MinValue(slice_beg + slice_size, item_count); + std::sort(indexes.begin() + slice_beg, indexes.begin() + slice_end, + [&](uint32_t a, uint32_t b) { return box_array[a].Center().y < box_array[b].Center().y; }); + } + + // Reorder the box_array and idx_array based on the sorted indexes + // DO this in-place. There cannot be any cycles since all indexes are unique + for (int i = 0; i < item_count; i++) { + if (indexes[i] == -1) + continue; // index `i` has been processed, skip + auto box = box_array[i]; + auto idx = idx_array[i]; + + int x = i, y = indexes[i]; // `x` is the current index, `y` is the "target" index + while (y != i) { + indexes[x] = -1; // mark index as processed + box_array[x] = box_array[y]; + idx_array[x] = idx_array[y]; + x = y; + y = indexes[x]; + } + // Now `x` is the index that satisfies `indices[x] == i`. + box_array[x] = box; + idx_array[x] = idx; + indexes[x] = -1; } } @@ -176,6 +226,7 @@ class FlatRTree { // Generate hilbert curve values // TODO: Parallelize this with tasks when the number of items is large? + constexpr auto max_hilbert = std::numeric_limits::max(); const auto hw = max_hilbert / (tree_box.max.x - tree_box.min.x); const auto hh = max_hilbert / (tree_box.max.y - tree_box.min.y); @@ -191,7 +242,8 @@ class FlatRTree { } // Now, sort the indices based on their curve value - Sort(curve); + Sort(curve, box_array, idx_array); + //STRSort(box_array, idx_array); size_t layer_idx = 0; size_t entry_idx = 0; @@ -723,11 +775,17 @@ class SpatialJoinGlobalOperatorState final : public GlobalOperatorState { // TODO: Move this into proper profiling metrics later // Statistics atomic total_rtree_probes = {0}; + atomic total_rtree_successfull_probes = {0}; atomic total_rtree_candidates = {0}; + atomic max_candidates = {0}; + atomic min_candidates = {std::numeric_limits::max()}; ~SpatialJoinGlobalOperatorState() override { - Printer::PrintF("Spatial Join Stats: RTree Probes: %llu, RTree Candidates: %llu\n", total_rtree_probes.load(), - total_rtree_candidates.load()); + Printer::PrintF("Spatial Join RTree Probes: %llu\n", total_rtree_probes.load()); + Printer::PrintF("Spatial Join RTree Successful Probes: %llu\n", total_rtree_successfull_probes.load()); + Printer::PrintF("Spatial Join RTree Candidates: %llu\n", total_rtree_candidates.load()); + Printer::PrintF("Spatial Join RTree Max Candidates per Probe: %llu\n", max_candidates.load()); + Printer::PrintF("Spatial Join RTree Min Candidates per Probe: %llu\n", min_candidates.load()); } #endif }; @@ -843,14 +901,20 @@ OperatorResultType PhysicalSpatialJoin::ExecuteInternal(ExecutionContext &contex gstate.rtree->InitScan(lstate.scan, bbox); +#ifdef DUCKDB_SPATIAL_DEBUG_JOIN + gstate.total_rtree_probes += 1; +#endif + if (!gstate.rtree->Scan(lstate.scan)) { lstate.input_index++; continue; } #ifdef DUCKDB_SPATIAL_DEBUG_JOIN + gstate.total_rtree_successfull_probes += 1; gstate.total_rtree_candidates += lstate.scan.matches_count; - gstate.total_rtree_probes += 1; + gstate.max_candidates = MaxValue(gstate.max_candidates.load(), lstate.scan.matches_count); + gstate.min_candidates = MinValue(gstate.min_candidates.load(), lstate.scan.matches_count); #endif lstate.state = SpatialJoinState::SCAN; @@ -1042,14 +1106,16 @@ class SpatialJoinGlobalSourceState final : public GlobalSourceState { column_ids.push_back(op.build_side_key_types.size() + op.build_side_payload_types.size()); // We dont need to keep the tuples aroun after scanning - state.collection->InitializeScan(scan_state, std::move(column_ids), - TupleDataPinProperties::KEEP_EVERYTHING_PINNED); + state.collection->InitializeScan(scan_state, std::move(column_ids), TupleDataPinProperties::UNPIN_AFTER_DONE); tuples_maximum = state.collection->Count(); } const PhysicalSpatialJoin &op; + + mutex scan_lock; TupleDataParallelScanState scan_state; + // How many tuples we have scanned so far idx_t tuples_maximum = 0; atomic tuples_scanned = {0}; @@ -1057,10 +1123,17 @@ class SpatialJoinGlobalSourceState final : public GlobalSourceState { public: idx_t MaxThreads() override { const auto &state = op.op_state->Cast(); - const auto count = state.collection->Count(); + return state.collection->ChunkCount(); + } + + bool Scan(TupleDataLocalScanState &local_scan, DataChunk &chunk) { + const auto &collection = op.op_state->Cast().collection; - // Rough approximation of the number of threads to use - return count / (STANDARD_VECTOR_SIZE * 10ULL); + lock_guard guard(scan_lock); + const auto not_empty = collection->Scan(scan_state, local_scan, chunk); + tuples_scanned += chunk.size(); + + return not_empty; } }; @@ -1106,45 +1179,41 @@ SourceResultType PhysicalSpatialJoin::GetDataInternal(ExecutionContext &context, auto &gstate = input.global_state.Cast(); auto &lstate = input.local_state.Cast(); - const auto &tuples = gstate.op.op_state->Cast().collection; - - while (tuples->Scan(gstate.scan_state, lstate.scan_state, lstate.scan_chunk)) { - gstate.tuples_scanned += lstate.scan_chunk.size(); + if (!gstate.Scan(lstate.scan_state, lstate.scan_chunk)) { + return SourceResultType::FINISHED; + } - const auto matches = FlatVector::GetData(lstate.scan_chunk.data.back()); + const auto matches = FlatVector::GetData(lstate.scan_chunk.data.back()); - idx_t result_count = 0; - for (idx_t i = 0; i < lstate.scan_chunk.size(); i++) { - if (!matches[i]) { - lstate.match_sel.set_index(result_count++, i); - } + idx_t result_count = 0; + for (idx_t i = 0; i < lstate.scan_chunk.size(); i++) { + if (!matches[i]) { + lstate.match_sel.set_index(result_count++, i); } + } - if (result_count > 0) { + if (result_count > 0) { - const auto lhs_col_count = probe_side_output_columns.size(); - const auto rhs_col_count = build_side_output_columns.size(); + const auto lhs_col_count = probe_side_output_columns.size(); + const auto rhs_col_count = build_side_output_columns.size(); - // Null the LHS columns - for (idx_t i = 0; i < lhs_col_count; i++) { - auto &target = chunk.data[i]; - target.SetVectorType(VectorType::CONSTANT_VECTOR); - ConstantVector::SetNull(target, true); - } - - // Set the RHS columns - for (idx_t i = 0; i < rhs_col_count; i++) { - auto &target = chunk.data[lhs_col_count + i]; - // Offset by one here to skip the match column - target.Slice(lstate.scan_chunk.data[i], lstate.match_sel, result_count); - } + // Null the LHS columns + for (idx_t i = 0; i < lhs_col_count; i++) { + auto &target = chunk.data[i]; + target.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(target, true); + } - chunk.SetCardinality(result_count); - return SourceResultType::HAVE_MORE_OUTPUT; + // Set the RHS columns + for (idx_t i = 0; i < rhs_col_count; i++) { + auto &target = chunk.data[lhs_col_count + i]; + // Offset by one here to skip the match column + target.Slice(lstate.scan_chunk.data[i], lstate.match_sel, result_count); } } - return SourceResultType::FINISHED; + chunk.SetCardinality(result_count); + return SourceResultType::HAVE_MORE_OUTPUT; } //---------------------------------------------------------------------------------------------------------------------- From bd169da709c1c58360a3b567f3512b14efa8e120 Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Fri, 21 Nov 2025 13:58:33 +0100 Subject: [PATCH 41/41] actually prepare deserialization --- .../geometry/geometry_serialization.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/spatial/geometry/geometry_serialization.cpp b/src/spatial/geometry/geometry_serialization.cpp index f869a682..63216425 100644 --- a/src/spatial/geometry/geometry_serialization.cpp +++ b/src/spatial/geometry/geometry_serialization.cpp @@ -166,13 +166,13 @@ void Prepare(sgl::prepared_geometry &type, ArenaAllocato } template -static void DeserializeInternal(sgl::geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { +static void DeserializeInternal(GEOM_TYPE &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { BinaryReader reader(buffer, buffer_size); uint32_t stack[32]; uint32_t depth = 0; - auto geom = &result; + sgl::geometry *geom = &result; while (true) { const auto le = reader.Read() == 1; @@ -213,8 +213,8 @@ static void DeserializeInternal(sgl::geometry &result, ArenaAllocator &arena, co break; } for (uint32_t i = 0; i < ring_count; i++) { - auto ring_mem = arena.AllocateAligned(sizeof(sgl::geometry)); - const auto ring = new (ring_mem) sgl::geometry(sgl::geometry_type::LINESTRING, has_z, has_m); + auto ring_mem = arena.AllocateAligned(sizeof(GEOM_TYPE)); + const auto ring = new (ring_mem) GEOM_TYPE(sgl::geometry_type::LINESTRING, has_z, has_m); const auto vert_count = reader.Read(); const auto vert_array = reader.Reserve(vert_count * ring->get_vertex_width()); @@ -241,8 +241,8 @@ static void DeserializeInternal(sgl::geometry &result, ArenaAllocator &arena, co stack[depth++] = part_count; // Make a new part - const auto part_mem = arena.AllocateAligned(sizeof(sgl::geometry)); - const auto part_ptr = new (part_mem) sgl::geometry(sgl::geometry_type::INVALID, has_z, has_m); + const auto part_mem = arena.AllocateAligned(sizeof(GEOM_TYPE)); + const auto part_ptr = new (part_mem) GEOM_TYPE(sgl::geometry_type::INVALID, has_z, has_m); geom->append_part(part_ptr); geom = part_ptr; @@ -265,8 +265,8 @@ static void DeserializeInternal(sgl::geometry &result, ArenaAllocator &arena, co stack[depth - 1]--; if (stack[depth - 1] > 0) { - const auto part_mem = arena.AllocateAligned(sizeof(sgl::geometry)); - const auto part_ptr = new (part_mem) sgl::geometry(sgl::geometry_type::INVALID, has_z, has_m); + const auto part_mem = arena.AllocateAligned(sizeof(GEOM_TYPE)); + const auto part_ptr = new (part_mem) GEOM_TYPE(sgl::geometry_type::INVALID, has_z, has_m); parent->append_part(part_ptr); @@ -281,12 +281,12 @@ static void DeserializeInternal(sgl::geometry &result, ArenaAllocator &arena, co } void Serde::Deserialize(sgl::geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { - DeserializeInternal(result, arena, buffer, buffer_size); + DeserializeInternal(result, arena, buffer, buffer_size); } void Serde::DeserializePrepared(sgl::prepared_geometry &result, ArenaAllocator &arena, const char *buffer, size_t buffer_size) { - DeserializeInternal(result, arena, buffer, buffer_size); + DeserializeInternal(result, arena, buffer, buffer_size); } uint32_t Serde::TryGetBounds(const string_t &blob, Box2D &bbox) {