Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 80 additions & 48 deletions include/sparrow/map_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,36 @@ namespace sparrow
[[nodiscard]] SPARROW_API static bool
check_keys_sorted(const array& flat_keys, const offset_buffer_type& offsets);

/**
* @brief Core implementation for creating Arrow proxy structures.
*
* Internal helper that creates the ArrowArray and ArrowSchema from the provided
* components. This function contains the common logic shared by the public
* create_proxy overloads.
*
* @tparam METADATA_RANGE Type of metadata container
* @param flat_keys Array containing all map keys
* @param flat_items Array containing all map values
* @param list_offsets Buffer of offsets indicating map boundaries
* @param validity_buffer Optional validity bitmap buffer
* @param null_count Number of null elements
* @param flags Optional ArrowFlag set
* @param name Optional name for the array
* @param metadata Optional metadata for the array
* @return Arrow proxy containing the map array data and schema
*/
template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
[[nodiscard]] static arrow_proxy create_proxy_impl(
array&& flat_keys,
array&& flat_items,
offset_buffer_type&& list_offsets,
buffer<std::uint8_t>&& validity_buffer,
std::int64_t null_count,
std::optional<std::unordered_set<ArrowFlag>> flags,
std::optional<std::string_view> name,
std::optional<METADATA_RANGE> metadata
);

/**
* @brief Creates Arrow proxy from keys, values, offsets, and validity bitmap.
*
Expand Down Expand Up @@ -518,32 +548,25 @@ namespace sparrow
);
}

template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
arrow_proxy map_array::create_proxy(
template <input_metadata_container METADATA_RANGE>
arrow_proxy map_array::create_proxy_impl(
array&& flat_keys,
array&& flat_items,
offset_buffer_type&& list_offsets,
VB&& validity_input,
buffer<std::uint8_t>&& validity_buffer,
std::int64_t null_count,
std::optional<std::unordered_set<ArrowFlag>> flags,
std::optional<std::string_view> name,
std::optional<METADATA_RANGE> metadata
)
{
const auto size = list_offsets.size() - 1;
validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));

std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
if (keys_sorted)
{
flags.value().insert(ArrowFlag::MAP_KEYS_SORTED);
}

std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
struct_array entries(std::move(struct_children), false, std::string("entries"));

auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries));

const auto null_count = vbitmap.null_count();
const repeat_view<bool> children_ownership{true, 1};

ArrowSchema schema = make_arrow_schema(
Expand All @@ -559,13 +582,13 @@ namespace sparrow
);

std::vector<buffer<std::uint8_t>> arr_buffs = {
std::move(vbitmap).extract_storage(),
std::move(validity_buffer),
std::move(list_offsets).extract_storage()
};

ArrowArray arr = make_arrow_array(
static_cast<std::int64_t>(size), // length
static_cast<std::int64_t>(null_count),
null_count,
0, // offset
std::move(arr_buffs),
new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))},
Expand All @@ -576,6 +599,41 @@ namespace sparrow
return arrow_proxy{std::move(arr), std::move(schema)};
}

template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
arrow_proxy map_array::create_proxy(
array&& flat_keys,
array&& flat_items,
offset_buffer_type&& list_offsets,
VB&& validity_input,
std::optional<std::string_view> name,
std::optional<METADATA_RANGE> metadata
)
{
const auto size = list_offsets.size() - 1;
validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));

std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
if (keys_sorted)
{
flags.value().insert(ArrowFlag::MAP_KEYS_SORTED);
}

const auto null_count = vbitmap.null_count();
buffer<std::uint8_t> validity_buffer = std::move(vbitmap).extract_storage();

return create_proxy_impl(
std::move(flat_keys),
std::move(flat_items),
std::move(list_offsets),
std::move(validity_buffer),
static_cast<std::int64_t>(null_count),
std::move(flags),
name,
std::forward<std::optional<METADATA_RANGE>>(metadata)
);
}

template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
arrow_proxy map_array::create_proxy(
array&& flat_keys,
Expand Down Expand Up @@ -604,42 +662,16 @@ namespace sparrow
? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::MAP_KEYS_SORTED}}
: std::nullopt;

const auto size = list_offsets.size() - 1;

std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
struct_array entries(std::move(struct_children), false, std::string("entries"));

auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries));
const repeat_view<bool> children_ownership{true, 1};

ArrowSchema schema = make_arrow_schema(
std::string_view("+m"),
name, // name
metadata, // metadata
flags, // flags,
new ArrowSchema*[1]{new ArrowSchema(std::move(entries_schema))},
children_ownership, // children ownership
nullptr, // dictionary
true // dictionary ownership

);

std::vector<buffer<std::uint8_t>> arr_buffs = {
return create_proxy_impl(
std::move(flat_keys),
std::move(flat_items),
std::move(list_offsets),
buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
std::move(list_offsets).extract_storage()
};

ArrowArray arr = make_arrow_array(
static_cast<std::int64_t>(size), // length
0,
0, // offset
std::move(arr_buffs),
new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))},
children_ownership, // children ownership
nullptr, // dictionary
true // dictionary ownership
0, // null_count
std::move(flags),
name,
std::forward<std::optional<METADATA_RANGE>>(metadata)
);
return arrow_proxy{std::move(arr), std::move(schema)};
}
}
}
Loading