diff --git a/include/sparrow/map_array.hpp b/include/sparrow/map_array.hpp index e42c615e1..cdb2dfc7d 100644 --- a/include/sparrow/map_array.hpp +++ b/include/sparrow/map_array.hpp @@ -337,6 +337,36 @@ namespace sparrow [[nodiscard]] SPARROW_API static bool check_keys_sorted(const array& flat_keys, const offset_buffer_type& offsets); + /** + * @brief Core implementation for creating Arrow proxy structures. + * + * Internal helper that creates the ArrowArray and ArrowSchema from the provided + * components. This function contains the common logic shared by the public + * create_proxy overloads. + * + * @tparam METADATA_RANGE Type of metadata container + * @param flat_keys Array containing all map keys + * @param flat_items Array containing all map values + * @param list_offsets Buffer of offsets indicating map boundaries + * @param validity_buffer Optional validity bitmap buffer + * @param null_count Number of null elements + * @param flags Optional ArrowFlag set + * @param name Optional name for the array + * @param metadata Optional metadata for the array + * @return Arrow proxy containing the map array data and schema + */ + template > + [[nodiscard]] static arrow_proxy create_proxy_impl( + array&& flat_keys, + array&& flat_items, + offset_buffer_type&& list_offsets, + buffer&& validity_buffer, + std::int64_t null_count, + std::optional> flags, + std::optional name, + std::optional metadata + ); + /** * @brief Creates Arrow proxy from keys, values, offsets, and validity bitmap. * @@ -518,32 +548,25 @@ namespace sparrow ); } - template - arrow_proxy map_array::create_proxy( + template + arrow_proxy map_array::create_proxy_impl( array&& flat_keys, array&& flat_items, offset_buffer_type&& list_offsets, - VB&& validity_input, + buffer&& validity_buffer, + std::int64_t null_count, + std::optional> flags, std::optional name, std::optional metadata ) { const auto size = list_offsets.size() - 1; - validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward(validity_input)); - - std::optional> flags{{ArrowFlag::NULLABLE}}; - bool keys_sorted = check_keys_sorted(flat_keys, list_offsets); - if (keys_sorted) - { - flags.value().insert(ArrowFlag::MAP_KEYS_SORTED); - } std::array struct_children = {std::move(flat_keys), std::move(flat_items)}; struct_array entries(std::move(struct_children), false, std::string("entries")); auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries)); - const auto null_count = vbitmap.null_count(); const repeat_view children_ownership{true, 1}; ArrowSchema schema = make_arrow_schema( @@ -559,13 +582,13 @@ namespace sparrow ); std::vector> arr_buffs = { - std::move(vbitmap).extract_storage(), + std::move(validity_buffer), std::move(list_offsets).extract_storage() }; ArrowArray arr = make_arrow_array( static_cast(size), // length - static_cast(null_count), + null_count, 0, // offset std::move(arr_buffs), new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))}, @@ -576,6 +599,41 @@ namespace sparrow return arrow_proxy{std::move(arr), std::move(schema)}; } + template + arrow_proxy map_array::create_proxy( + array&& flat_keys, + array&& flat_items, + offset_buffer_type&& list_offsets, + VB&& validity_input, + std::optional name, + std::optional metadata + ) + { + const auto size = list_offsets.size() - 1; + validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward(validity_input)); + + std::optional> flags{{ArrowFlag::NULLABLE}}; + bool keys_sorted = check_keys_sorted(flat_keys, list_offsets); + if (keys_sorted) + { + flags.value().insert(ArrowFlag::MAP_KEYS_SORTED); + } + + const auto null_count = vbitmap.null_count(); + buffer validity_buffer = std::move(vbitmap).extract_storage(); + + return create_proxy_impl( + std::move(flat_keys), + std::move(flat_items), + std::move(list_offsets), + std::move(validity_buffer), + static_cast(null_count), + std::move(flags), + name, + std::forward>(metadata) + ); + } + template arrow_proxy map_array::create_proxy( array&& flat_keys, @@ -604,42 +662,16 @@ namespace sparrow ? std::optional>{{ArrowFlag::MAP_KEYS_SORTED}} : std::nullopt; - const auto size = list_offsets.size() - 1; - - std::array struct_children = {std::move(flat_keys), std::move(flat_items)}; - struct_array entries(std::move(struct_children), false, std::string("entries")); - - auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries)); - const repeat_view children_ownership{true, 1}; - - ArrowSchema schema = make_arrow_schema( - std::string_view("+m"), - name, // name - metadata, // metadata - flags, // flags, - new ArrowSchema*[1]{new ArrowSchema(std::move(entries_schema))}, - children_ownership, // children ownership - nullptr, // dictionary - true // dictionary ownership - - ); - - std::vector> arr_buffs = { + return create_proxy_impl( + std::move(flat_keys), + std::move(flat_items), + std::move(list_offsets), buffer{nullptr, 0}, // no validity bitmap - std::move(list_offsets).extract_storage() - }; - - ArrowArray arr = make_arrow_array( - static_cast(size), // length - 0, - 0, // offset - std::move(arr_buffs), - new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))}, - children_ownership, // children ownership - nullptr, // dictionary - true // dictionary ownership + 0, // null_count + std::move(flags), + name, + std::forward>(metadata) ); - return arrow_proxy{std::move(arr), std::move(schema)}; } } }