Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions include/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ namespace sparrow
* and contains properly formatted bit data.
*/
template <allocator A = default_allocator>
constexpr dynamic_bitset(block_type* p, size_type n, const A& a = A());
constexpr dynamic_bitset(block_type* p, size_type n, size_type offset, const A& a = A());

/**
* @brief Constructs a bitset using existing memory with null count tracking.
Expand All @@ -168,7 +168,13 @@ namespace sparrow
* @post null_count() == null_count
*/
template <allocator A = default_allocator>
constexpr dynamic_bitset(block_type* p, size_type n, size_type null_count, const A& a = A());
constexpr dynamic_bitset(
block_type* p,
size_type n,
size_type offset,
size_type null_count,
const A& a = A()
);

constexpr ~dynamic_bitset() = default;
constexpr dynamic_bitset(const dynamic_bitset&) = default;
Expand Down Expand Up @@ -197,7 +203,7 @@ namespace sparrow
template <class A>
requires(not std::same_as<A, dynamic_bitset<T>> and allocator<A>)
constexpr dynamic_bitset<T>::dynamic_bitset(const A& a)
: base_type(storage_type(a), 0u)
: base_type(storage_type(a), 0u, 0u)
{
base_type::zero_unused_bits();
}
Expand All @@ -224,16 +230,17 @@ namespace sparrow

template <std::integral T>
template <allocator A>
constexpr dynamic_bitset<T>::dynamic_bitset(block_type* p, size_type n, const A& a)
: base_type(storage_type(p, p != nullptr ? this->compute_block_count(n) : 0, a), n)
constexpr dynamic_bitset<T>::dynamic_bitset(block_type* p, size_type n, size_type offset, const A& a)
: base_type(storage_type(p, p != nullptr ? this->compute_block_count(n) : 0, a), n, offset)
{
base_type::zero_unused_bits();
}

template <std::integral T>
template <allocator A>
constexpr dynamic_bitset<T>::dynamic_bitset(block_type* p, size_type n, size_type null_count, const A& a)
: base_type(storage_type(p, this->compute_block_count(n), a), n, null_count)
constexpr dynamic_bitset<
T>::dynamic_bitset(block_type* p, size_type n, size_type offset, size_type null_count, const A& a)
: base_type(storage_type(p, this->compute_block_count(n), a), n, offset, null_count)
{
base_type::zero_unused_bits();
SPARROW_ASSERT_TRUE(base_type::null_count() == base_type::size() - base_type::count_non_null());
Expand Down
43 changes: 33 additions & 10 deletions include/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,27 +334,40 @@ namespace sparrow
return std::move(m_buffer);
}

/**
* @brief Returns the bit offset from the start of the buffer.
*
* This value indicates how many bits from the beginning of the underlying storage
* are skipped before the bitset's logical start. It is set during construction and
* remains constant for the lifetime of the bitset.
*
* @return The number of bits offset from the start of the buffer.
*/
[[nodiscard]] size_t offset() const noexcept;
Copy link

Copilot AI Sep 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The offset() method declaration is missing implementation. Add the implementation for this method to return m_offset.

Suggested change
[[nodiscard]] size_t offset() const noexcept;
[[nodiscard]] size_t offset() const noexcept
{
return m_offset;
}

Copilot uses AI. Check for mistakes.


protected:

/**
* @brief Constructs a bitset with the given storage and size.
* @param buffer The storage buffer to use
* @param size The number of bits in the bitset
* @param offset The number of bits to offset from the start of the buffer
* @post size() == size
* @post null_count() is computed by counting unset bits
*/
constexpr dynamic_bitset_base(storage_type buffer, size_type size);
constexpr dynamic_bitset_base(storage_type buffer, size_type size, size_type offset);

/**
* @brief Constructs a bitset with the given storage, size, and null count.
* @param buffer The storage buffer to use
* @param size The number of bits in the bitset
* @param null_count The number of unset bits
* @param offset The number of bits to offset from the start of the buffer
* @pre null_count <= size
* @post size() == size
* @post null_count() == null_count
*/
constexpr dynamic_bitset_base(storage_type buffer, size_type size, size_type null_count);
constexpr dynamic_bitset_base(storage_type buffer, size_type size, size_type offset, size_type null_count);

constexpr ~dynamic_bitset_base() = default;

Expand Down Expand Up @@ -540,6 +553,7 @@ namespace sparrow
storage_type m_buffer; ///< The underlying storage for bit data
size_type m_size; ///< The number of bits in the bitset
size_type m_null_count; ///< The number of bits set to false
size_t m_offset = 0; ///< The number of bits to offset from the start of the buffer

friend class bitset_iterator<self_type, true>; ///< Const iterator needs access to internals
friend class bitset_iterator<self_type, false>; ///< Mutable iterator needs access to internals
Expand Down Expand Up @@ -598,7 +612,8 @@ namespace sparrow
{
return true;
}
return !m_null_count || buffer().data()[block_index(pos)] & bit_mask(pos);
const size_t pos_with_offset = pos + m_offset;
return !m_null_count || buffer().data()[block_index(pos_with_offset)] & bit_mask(pos_with_offset);
}

template <typename B>
Expand Down Expand Up @@ -627,15 +642,16 @@ namespace sparrow
}
}
}
block_type& block = buffer().data()[block_index(pos)];
const bool old_value = block & bit_mask(pos);
const size_t pos_with_offset = pos + m_offset;
block_type& block = buffer().data()[block_index(pos_with_offset)];
const bool old_value = block & bit_mask(pos_with_offset);
if (value)
{
block |= bit_mask(pos);
block |= bit_mask(pos_with_offset);
}
else
{
block &= block_type(~bit_mask(pos));
block &= block_type(~bit_mask(pos_with_offset));
}
update_null_count(old_value, value);
}
Expand Down Expand Up @@ -790,19 +806,26 @@ namespace sparrow

template <typename B>
requires std::ranges::random_access_range<std::remove_pointer_t<B>>
constexpr dynamic_bitset_base<B>::dynamic_bitset_base(storage_type buf, size_type size)
constexpr dynamic_bitset_base<B>::dynamic_bitset_base(storage_type buf, size_type size, size_type offset)
: m_buffer(std::move(buf))
, m_size(size)
, m_null_count(m_size - count_non_null())
, m_offset(offset)
{
}

template <typename B>
requires std::ranges::random_access_range<std::remove_pointer_t<B>>
constexpr dynamic_bitset_base<B>::dynamic_bitset_base(storage_type buf, size_type size, size_type null_count)
constexpr dynamic_bitset_base<B>::dynamic_bitset_base(
storage_type buf,
size_type size,
size_type offset,
size_type null_count
)
: m_buffer(std::move(buf))
, m_size(size)
, m_null_count(null_count)
, m_offset(offset)
{
}

Expand Down Expand Up @@ -914,7 +937,7 @@ namespace sparrow
return;
}
size_type old_block_count = buffer().size();
const size_type new_block_count = compute_block_count(n);
const size_type new_block_count = compute_block_count(n + m_offset);
Copy link

Copilot AI Sep 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding offset to the size for block count calculation is incorrect. The block count should be based on the actual storage needed for the bits, not the logical size plus offset. This could cause buffer overruns when the offset pushes beyond allocated memory.

Suggested change
const size_type new_block_count = compute_block_count(n + m_offset);
const size_type new_block_count = compute_block_count(n);

Copilot uses AI. Check for mistakes.

const block_type value = b ? block_type(~block_type(0)) : block_type(0);

if (new_block_count != old_block_count)
Expand Down
17 changes: 11 additions & 6 deletions include/sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ namespace sparrow
* assert(view.test(0) == true); // First bit is set
* @endcode
*/
constexpr dynamic_bitset_view(block_type* p, size_type n);
constexpr dynamic_bitset_view(block_type* p, size_type n, size_type offset);

/**
* @brief Constructs a bitset view from external memory with null count tracking.
Expand Down Expand Up @@ -141,7 +141,7 @@ namespace sparrow
* assert(view.count_non_null() == 10);
* @endcode
*/
constexpr dynamic_bitset_view(block_type* p, size_type n, size_type null_count);
constexpr dynamic_bitset_view(block_type* p, size_type n, size_type null_count, size_type offset);

constexpr ~dynamic_bitset_view() = default;

Expand All @@ -153,14 +153,19 @@ namespace sparrow
};

template <std::integral T>
constexpr dynamic_bitset_view<T>::dynamic_bitset_view(block_type* p, size_type n)
: base_type(storage_type(p, p != nullptr ? this->compute_block_count(n) : 0), n)
constexpr dynamic_bitset_view<T>::dynamic_bitset_view(block_type* p, size_type n, size_type offset)
: base_type(storage_type(p, p != nullptr ? this->compute_block_count(n) : 0), n, offset)
{
}

template <std::integral T>
constexpr dynamic_bitset_view<T>::dynamic_bitset_view(block_type* p, size_type n, size_type null_count)
: base_type(storage_type(p, p != nullptr ? this->compute_block_count(n) : 0), n, null_count)
constexpr dynamic_bitset_view<T>::dynamic_bitset_view(
block_type* p,
size_type n,
size_type null_count,
size_type offset
)
: base_type(storage_type(p, p != nullptr ? this->compute_block_count(n) : 0), n, null_count, offset)
{
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace sparrow
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;

constexpr explicit non_owning_dynamic_bitset(buffer<T>* buffer, size_type n);
constexpr explicit non_owning_dynamic_bitset(buffer<T>* buffer, size_type n, size_type offset);

constexpr ~non_owning_dynamic_bitset() = default;
constexpr non_owning_dynamic_bitset(const non_owning_dynamic_bitset&) = default;
Expand All @@ -49,8 +49,8 @@ namespace sparrow
};

template <std::integral T>
constexpr non_owning_dynamic_bitset<T>::non_owning_dynamic_bitset(buffer<T>* buffer, size_type n)
: base_type(buffer, n)
constexpr non_owning_dynamic_bitset<T>::non_owning_dynamic_bitset(buffer<T>* buffer, size_type n, size_type offset)
: base_type(buffer, n, offset)
{
SPARROW_ASSERT_TRUE(buffer != nullptr);
}
Expand Down
4 changes: 2 additions & 2 deletions include/sparrow/decimal_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ namespace sparrow
std::move(data_buffer),
precision,
scale,
nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
nullable ? std::make_optional<validity_bitmap>(nullptr, size, 0u) : std::nullopt,
name,
metadata
);
Expand All @@ -555,7 +555,7 @@ namespace sparrow
std::move(u8_data_buffer),
precision,
scale,
nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
nullable ? std::make_optional<validity_bitmap>(nullptr, size, 0u) : std::nullopt,
name,
metadata
);
Expand Down
2 changes: 1 addition & 1 deletion include/sparrow/dictionary_encoded_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ namespace sparrow
return create_proxy_impl(
std::forward<keys_buffer_type>(keys),
std::forward<array>(values),
nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
nullable ? std::make_optional<validity_bitmap>(nullptr, size, 0u) : std::nullopt,
std::move(name),
std::move(metadata)
);
Expand Down
2 changes: 1 addition & 1 deletion include/sparrow/fixed_width_binary_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ namespace sparrow
)
{
u8_buffer<char> data_buffer{};
std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0)
std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0u, 0u)
: std::nullopt;
return create_proxy_impl(
std::move(data_buffer),
Expand Down
2 changes: 1 addition & 1 deletion include/sparrow/layout/array_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ namespace sparrow
template <class D>
constexpr auto array_crtp_base<D>::bitmap_begin() const -> const_bitmap_iterator
{
return sparrow::next(this->derived_cast().get_bitmap().cbegin(), get_arrow_proxy().offset());
return this->derived_cast().get_bitmap().cbegin();
}

template <class D>
Expand Down
8 changes: 3 additions & 5 deletions include/sparrow/layout/array_bitmap_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ namespace sparrow
arrow_proxy& arrow_proxy = this->get_arrow_proxy();
SPARROW_ASSERT_TRUE(arrow_proxy.buffers().size() > bitmap_buffer_index);
const auto bitmap_size = arrow_proxy.length() + arrow_proxy.offset();
return bitmap_type(arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size);
return bitmap_type(arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size, arrow_proxy.offset());
}

template <class D, bool is_mutable>
Expand All @@ -373,8 +373,7 @@ namespace sparrow
SPARROW_ASSERT_TRUE(this->bitmap_cbegin() <= pos)
SPARROW_ASSERT_TRUE(pos <= this->bitmap_cend())
arrow_proxy& arrow_proxy = this->get_arrow_proxy();
const auto pos_index = static_cast<size_t>(std::distance(this->bitmap_cbegin(), pos))
+ arrow_proxy.offset();
const auto pos_index = static_cast<size_t>(std::distance(this->bitmap_cbegin(), pos));
const auto idx = arrow_proxy.insert_bitmap(pos_index, value, count);
return sparrow::next(this->bitmap_begin(), idx);
}
Expand All @@ -391,8 +390,7 @@ namespace sparrow
SPARROW_ASSERT_TRUE(pos <= this->bitmap_cend());
SPARROW_ASSERT_TRUE(first <= last);
arrow_proxy& arrow_proxy = this->get_arrow_proxy();
const auto pos_index = static_cast<size_t>(std::distance(this->bitmap_cbegin(), pos))
+ arrow_proxy.offset();
const auto pos_index = static_cast<size_t>(std::distance(this->bitmap_cbegin(), pos));
const auto idx = arrow_proxy.insert_bitmap(pos_index, std::ranges::subrange(first, last));
return sparrow::next(this->bitmap_begin(), idx);
}
Expand Down
2 changes: 1 addition & 1 deletion include/sparrow/layout/mutable_array_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ namespace sparrow
template <class D>
constexpr auto mutable_array_base<D>::bitmap_begin() -> bitmap_iterator
{
return sparrow::next(this->derived_cast().get_bitmap().begin(), this->get_arrow_proxy().offset());
return this->derived_cast().get_bitmap().begin();
}

template <class D>
Expand Down
6 changes: 3 additions & 3 deletions include/sparrow/layout/primitive_array_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ namespace sparrow
return create_proxy_impl(
std::move(data_buffer),
n,
nullable ? std::make_optional<validity_bitmap>(nullptr, 0) : std::nullopt,
nullable ? std::make_optional<validity_bitmap>(nullptr, 0u, 0u) : std::nullopt,
std::move(name),
std::move(metadata)
);
Expand All @@ -563,7 +563,7 @@ namespace sparrow
std::optional<METADATA_RANGE> metadata
)
{
std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0)
std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0u, 0u)
: std::nullopt;
return create_proxy_impl(
std::move(data_buffer),
Expand All @@ -586,7 +586,7 @@ namespace sparrow
{
auto data_buffer = details::primitive_data_access<T>::make_data_buffer(std::forward<R>(range));
auto distance = static_cast<size_t>(std::ranges::distance(range));
std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0)
std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0u, 0u)
: std::nullopt;
return create_proxy_impl(
std::move(data_buffer),
Expand Down
Loading
Loading