Skip to content

Commit

Permalink
Merge pull request #48 from finger563/size_t_fix
Browse files Browse the repository at this point in the history
Update of #45 (replace size_t with uint32_t for length encoding and add new option for aligned memory access)
  • Loading branch information
p-ranav authored Jul 19, 2024
2 parents 576c28e + 7d4de8f commit 8e1e13f
Show file tree
Hide file tree
Showing 20 changed files with 632 additions and 53 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ target_include_directories(alpaca INTERFACE


if(ALPACA_BUILD_TESTS)
add_subdirectory(test)
add_subdirectory(test)
endif()

if(ALPACA_BUILD_SAMPLES)
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ The source for the above example can be found [here](https://github.com/p-ranav/
* [Data Structure Versioning](#data-structure-versioning)
* [Integrity Checking with Checksums](#integrity-checking-with-checksums)
* [Macros to Exclude STL Data Structures](#macros-to-exclude-stl-data-structures)
* [Aligned Memory Access](#aligned-memory-access)
* [Python Interoperability](#python-interoperability)
* [Usage](#usage)
* [Format String Specification](#format-string-specification)
Expand Down Expand Up @@ -1161,6 +1162,15 @@ int main() {
auto bytes_written = serialize<options::fixed_length_encoding>(s, bytes);
}
```
### Aligned Memory Access

The Alpaca library, by default, utilizes unaligned memory access, as this is permitted on the x86_64 architecture.
However, certain architectures, such as the Arm Cortex-M3, M4, and M33, require aligned memory access for 32-bit
and 64-bit data types.

For architectures where aligned memory access is necessary, the Alpaca library includes the
```options::force_aligned_access``` option.
When this option is enabled, the library will not perform unaligned accesses and will use ```memcpy``` instead.

## Python Interoperability

Expand Down
2 changes: 1 addition & 1 deletion include/alpaca/detail/field_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ enum class field_type : uint8_t {
list,
deque,
filesystem_path,
bitset
bitset,
};

template <field_type value> constexpr uint8_t to_byte() {
Expand Down
39 changes: 31 additions & 8 deletions include/alpaca/detail/from_bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <alpaca/detail/options.h>
#include <alpaca/detail/variable_length_encoding.h>
#include <cstdint>
#include <cstring>
#include <fstream>
#include <iostream>
#include <system_error>
Expand All @@ -12,6 +13,21 @@ namespace alpaca {

namespace detail {

template <options O, typename T>
void get_aligned(T& value, const uint8_t* bytes, size_t current_index)
{
if (force_aligned_access<O>() &&
reinterpret_cast<uintptr_t>(bytes + current_index) % alignof(T) != 0)
{
// non-aligned access --> byte-byte copy
std::memcpy(&value, bytes + current_index, sizeof(T));
} else {
// aligned access, directly assign the value
value = *(reinterpret_cast<const T *>(bytes + current_index));
}
}


template <options O, typename Container>
typename std::enable_if<!std::is_array_v<Container>, bool>::type
from_bytes_crc32(uint32_t &value, Container &bytes, std::size_t &current_index,
Expand All @@ -21,7 +37,9 @@ from_bytes_crc32(uint32_t &value, Container &bytes, std::size_t &current_index,
if (end_index < num_bytes_to_read) {
return false;
}
value = *(reinterpret_cast<const uint32_t *>(bytes.data() + current_index));

get_aligned<O>(value, &bytes[0], current_index);

update_value_based_on_alpaca_endian_rules<O, uint32_t>(value);
current_index += num_bytes_to_read;
return true;
Expand All @@ -36,7 +54,9 @@ from_bytes_crc32(uint32_t &value, Container &bytes, std::size_t &current_index,
if (end_index < num_bytes_to_read) {
return false;
}
value = *(reinterpret_cast<const uint32_t *>(bytes + current_index));

get_aligned<O>(value, &bytes[0], current_index);

update_value_based_on_alpaca_endian_rules<O, uint32_t>(value);
current_index += num_bytes_to_read;
return true;
Expand Down Expand Up @@ -87,7 +107,8 @@ from_bytes(T &value, Container &bytes, std::size_t &current_index,
/// TODO: report error
return false;
}
value = *(reinterpret_cast<const T *>(bytes.data() + current_index));

get_aligned<O>(value, &bytes[0], current_index);
current_index += num_bytes_to_read;
update_value_based_on_alpaca_endian_rules<O, T>(value);
return true;
Expand Down Expand Up @@ -139,7 +160,8 @@ from_bytes(T &value, Container &bytes, std::size_t &current_index,
/// TODO: report error
return false;
}
value = *(reinterpret_cast<const T *>(bytes + current_index));

get_aligned<O>(value, &bytes[0], current_index);
current_index += num_bytes_to_read;
update_value_based_on_alpaca_endian_rules<O, T>(value);
return true;
Expand Down Expand Up @@ -194,7 +216,8 @@ from_bytes(T &value, Container &bytes, std::size_t &current_index,
char value_bytes[num_bytes_to_read];
bytes.read(&value_bytes[0], num_bytes_to_read);
current_index += num_bytes_to_read;
value = *(reinterpret_cast<const T *>(value_bytes));
get_aligned<O>(value, (uint8_t*) &value_bytes[0], 0);

update_value_based_on_alpaca_endian_rules<O, T>(value);
return true;
}
Expand Down Expand Up @@ -237,7 +260,7 @@ from_bytes(T &value, Container &bytes, std::size_t &current_index,
/// TODO: report error
return false;
}
value = *(reinterpret_cast<const T *>(bytes.data() + current_index));
get_aligned<O>(value, &bytes[0], current_index);
current_index += num_bytes_to_read;
} else {
value = decode_varint<T>(bytes, current_index);
Expand Down Expand Up @@ -286,7 +309,7 @@ from_bytes(T &value, Container &bytes, std::size_t &current_index,
/// TODO: report error
return false;
}
value = *(reinterpret_cast<const T *>(bytes + current_index));
get_aligned<O>(value, &bytes[0], current_index);
current_index += num_bytes_to_read;
} else {
value = decode_varint<T>(bytes, current_index);
Expand Down Expand Up @@ -338,7 +361,7 @@ from_bytes(T &value, Container &bytes, std::size_t &current_index,
char value_bytes[num_bytes_to_read];
bytes.read(&value_bytes[0], num_bytes_to_read);
current_index += num_bytes_to_read;
value = *(reinterpret_cast<const T *>(value_bytes));
get_aligned<O>(value, (uint8_t*) &value_bytes[0], 0);
} else {
value = decode_varint<T>(bytes, current_index);
}
Expand Down
11 changes: 10 additions & 1 deletion include/alpaca/detail/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ enum class options {
big_endian = 1,
fixed_length_encoding = 2,
with_version = 4,
with_checksum = 8
with_checksum = 8,
force_aligned_access = 16,
};

template <typename E> struct enable_bitmask_operators {
Expand All @@ -25,6 +26,9 @@ operator|(E lhs, E rhs) {

namespace detail {

using size_t_serialized_type = uint32_t;


template <typename T, T value, T flag> constexpr bool enum_has_flag() {
using underlying = typename std::underlying_type<T>::type;
return (static_cast<underlying>(value) & static_cast<underlying>(flag)) ==
Expand All @@ -47,6 +51,11 @@ template <options O> constexpr bool with_version() {

template <options O> constexpr bool with_checksum() {
return enum_has_flag<options, O, options::with_checksum>();

}

template <options O> constexpr bool force_aligned_access() {
return enum_has_flag<options, O, options::force_aligned_access>();
}

} // namespace detail
Expand Down
9 changes: 6 additions & 3 deletions include/alpaca/detail/to_bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ to_bytes(T &bytes, std::size_t &byte_index, const U &original_value) {
// encode as variable-length
template <options O, typename T, typename U>
typename std::enable_if<
std::is_same_v<U, uint32_t> || std::is_same_v<U, uint64_t> ||
std::is_same_v<U, int32_t> || std::is_same_v<U, long> || std::is_same_v<U, int64_t> ||
std::is_same_v<U, std::size_t>,
std::is_same_v<U, uint32_t> || std::is_same_v<U, uint64_t> ||
std::is_same_v<U, int32_t> || std::is_same_v<U, int64_t> ||
std::is_same_v<U, unsigned long> || std::is_same_v<U, long> ||
std::is_same_v<U, unsigned long long> || std::is_same_v<U, long long>,
void>::type
to_bytes(T &bytes, std::size_t &byte_index, const U &original_value) {

static_assert(!std::is_same_v<T, size_t>, "Unable to directly serialize size_t type");

U value = original_value;
update_value_based_on_alpaca_endian_rules<O, U>(value);

Expand Down
6 changes: 3 additions & 3 deletions include/alpaca/detail/types/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ typename std::enable_if<is_array_type<T>::value, void>::type type_info(
std::vector<uint8_t> &typeids,
std::unordered_map<std::string_view, std::size_t> &struct_visitor_map) {
typeids.push_back(to_byte<field_type::array>());
typeids.push_back(std::tuple_size_v<T>);
typeids.push_back((size_t_serialized_type) std::tuple_size_v<T>);
using value_type = typename T::value_type;
type_info<value_type>(typeids, struct_visitor_map);
}
Expand All @@ -41,7 +41,7 @@ void from_bytes_to_array(T &value, Container &bytes, std::size_t &current_index,

using decayed_value_type = typename std::decay<typename T::value_type>::type;

constexpr auto size = std::tuple_size<T>::value;
constexpr auto size = (size_t_serialized_type) std::tuple_size<T>::value;

if (size > end_index - current_index) {
// size is greater than the number of bytes remaining
Expand All @@ -52,7 +52,7 @@ void from_bytes_to_array(T &value, Container &bytes, std::size_t &current_index,
}

// read `size` bytes and save to value
for (std::size_t i = 0; i < size; ++i) {
for (size_t_serialized_type i = 0; i < size; ++i) {
decayed_value_type v{};
from_bytes_router<O>(v, bytes, current_index, end_index, error_code);
value[i] = v;
Expand Down
8 changes: 4 additions & 4 deletions include/alpaca/detail/types/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ template <options O, std::size_t N, typename Container>
void to_bytes_from_bitset_type(const std::bitset<N> &input, Container &bytes,
std::size_t &byte_index) {
// save bitset size
to_bytes_router<O, std::size_t>(input.size(), bytes, byte_index);
to_bytes_router<O, size_t_serialized_type>((size_t_serialized_type) input.size(), bytes, byte_index);

// serialize the bitset itself into (bits/8 + 1) bytes
int num_bytes = input.size() / 8 + 1;
Expand Down Expand Up @@ -63,8 +63,8 @@ bool from_bytes_to_bitset(std::bitset<N> &value, Container &bytes,
}

// current byte is the size of the vector
std::size_t size = 0;
detail::from_bytes<O, std::size_t>(size, bytes, current_index, end_index,
size_t_serialized_type size = 0;
detail::from_bytes<O, size_t_serialized_type>(size, bytes, current_index, end_index,
error_code);

if (size != N) {
Expand All @@ -90,7 +90,7 @@ bool from_bytes_to_bitset(std::bitset<N> &value, Container &bytes,
// reset the value to 0
value.reset();

for (std::size_t i = 0; i < num_serialized_bytes; ++i) {
for (size_t_serialized_type i = 0; i < num_serialized_bytes; ++i) {
uint8_t byte{};
from_bytes_router<O>(byte, bytes, current_index, end_index, error_code);
if (error_code) {
Expand Down
8 changes: 4 additions & 4 deletions include/alpaca/detail/types/deque.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ template <options O, typename T, typename Container>
void to_bytes_from_deque_type(const T &input, Container &bytes,
std::size_t &byte_index) {
// save deque size
to_bytes_router<O, std::size_t>(input.size(), bytes, byte_index);
to_bytes_router<O, size_t_serialized_type>(input.size(), bytes, byte_index);

// value of each element in deque
for (const auto &v : input) {
Expand Down Expand Up @@ -57,8 +57,8 @@ bool from_bytes_to_deque(std::deque<T> &value, Container &bytes,
}

// current byte is the size of the vector
std::size_t size = 0;
detail::from_bytes<O, std::size_t>(size, bytes, current_index, end_index,
size_t_serialized_type size = 0;
detail::from_bytes<O, size_t_serialized_type>(size, bytes, current_index, end_index,
error_code);

if (size > end_index - current_index) {
Expand All @@ -70,7 +70,7 @@ bool from_bytes_to_deque(std::deque<T> &value, Container &bytes,
}

// read `size` bytes and save to value
for (std::size_t i = 0; i < size; ++i) {
for (size_t_serialized_type i = 0; i < size; ++i) {
T v{};
from_bytes_router<O>(v, bytes, current_index, end_index, error_code);
if (error_code) {
Expand Down
13 changes: 7 additions & 6 deletions include/alpaca/detail/types/filesystem_path.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ template <options O, typename Container>
void to_bytes(Container &bytes, std::size_t &byte_index,
const std::filesystem::path &input) {
// save string length
to_bytes_router<O>(input.native().size(), bytes, byte_index);
to_bytes_router<O>((size_t_serialized_type)input.native().size(), bytes,
byte_index);

for (const auto &c : input.native()) {
to_bytes<O>(bytes, byte_index, c);
Expand All @@ -35,8 +36,8 @@ void to_bytes(Container &bytes, std::size_t &byte_index,

template <options O, typename Container>
bool from_bytes(std::filesystem::path &value, Container &bytes,
std::size_t &current_index, std::size_t &end_index,
std::error_code &error_code) {
std::size_t &current_index, std::size_t &end_index,
std::error_code &error_code) {

if (current_index >= end_index) {
// end of input
Expand All @@ -45,9 +46,9 @@ bool from_bytes(std::filesystem::path &value, Container &bytes,
}

// current byte is the length of the string
std::size_t size = 0;
detail::from_bytes<O, std::size_t>(size, bytes, current_index, end_index,
error_code);
size_t_serialized_type size = 0;
detail::from_bytes<O, size_t_serialized_type>(size, bytes, current_index,
end_index, error_code);

if (size > end_index - current_index) {
// size is greater than the number of bytes remaining
Expand Down
6 changes: 3 additions & 3 deletions include/alpaca/detail/types/list.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ template <options O, typename T, typename Container>
void to_bytes_from_list_type(const T &input, Container &bytes,
std::size_t &byte_index) {
// save list size
to_bytes_router<O, std::size_t>(input.size(), bytes, byte_index);
to_bytes_router<O, size_t_serialized_type>(input.size(), bytes, byte_index);

// value of each element in list
for (const auto &v : input) {
Expand Down Expand Up @@ -57,8 +57,8 @@ bool from_bytes_to_list(std::list<T> &value, Container &bytes,
}

// current byte is the size of the vector
std::size_t size = 0;
detail::from_bytes<O, std::size_t>(size, bytes, current_index, end_index,
size_t_serialized_type size = 0;
detail::from_bytes<O, size_t_serialized_type>(size, bytes, current_index, end_index,
error_code);

if (size > end_index - current_index) {
Expand Down
6 changes: 3 additions & 3 deletions include/alpaca/detail/types/map.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ template <options O, typename T, typename Container>
void to_bytes_from_map_type(const T &input, Container &bytes,
std::size_t &byte_index) {
// save map size
to_bytes_router<O, std::size_t, Container>(input.size(), bytes, byte_index);
to_bytes_router<O, size_t_serialized_type, Container>((size_t_serialized_type) input.size(), bytes, byte_index);

// save key,value pairs in map
for (const auto &[key, value] : input) {
Expand Down Expand Up @@ -84,8 +84,8 @@ template <options O, typename T, typename Container>
void from_bytes_to_map(T &map, Container &bytes, std::size_t &current_index,
std::size_t &end_index, std::error_code &error_code) {
// current byte is the size of the map
std::size_t size = 0;
detail::from_bytes<O, std::size_t>(size, bytes, current_index, end_index,
size_t_serialized_type size = 0;
detail::from_bytes<O, size_t_serialized_type>(size, bytes, current_index, end_index,
error_code);

if (size > end_index - current_index) {
Expand Down
6 changes: 3 additions & 3 deletions include/alpaca/detail/types/set.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ template <options O, typename T, typename Container>
void to_bytes_from_set_type(const T &input, Container &bytes,
std::size_t &byte_index) {
// save set size
to_bytes_router<O, std::size_t, Container>(input.size(), bytes, byte_index);
to_bytes_router<O, size_t_serialized_type, Container>((size_t_serialized_type) input.size(), bytes, byte_index);

// save values in set
for (const auto &value : input) {
Expand Down Expand Up @@ -82,8 +82,8 @@ template <options O, typename T, typename Container>
void from_bytes_to_set(T &set, Container &bytes, std::size_t &current_index,
std::size_t &end_index, std::error_code &error_code) {
// current byte is the size of the set
std::size_t size = 0;
detail::from_bytes<O, std::size_t>(size, bytes, current_index, end_index,
size_t_serialized_type size = 0;
detail::from_bytes<O, size_t_serialized_type>(size, bytes, current_index, end_index,
error_code);

if (size > end_index - current_index) {
Expand Down
Loading

0 comments on commit 8e1e13f

Please sign in to comment.