Skip to content

Commit 2ab76da

Browse files
dngrudinDmitry Grudin
andauthored
Add out of bounds check for encoding/decoding (#42)
* Fixed errors * Add safe encoding/decoding mode with buffer overflow control --------- Co-authored-by: Dmitry Grudin <[email protected]>
1 parent 5400008 commit 2ab76da

30 files changed

+1213
-254
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ jobs:
5050
- uses: actions/checkout@v2
5151
- name: install build tools (gcc)
5252
run: |
53+
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
5354
sudo apt install g++-11 -y
5455
echo "CC=gcc-11" >> $GITHUB_ENV
5556
echo "CXX=g++-11" >> $GITHUB_ENV

README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ using Class = message<
5252
>;
5353
```
5454
Subsequently, both serialization and deserialization become so easy to do:
55+
- Mode without buffer overflow control
5556
```c++
5657
// serialization
5758
Student twice {123, "twice"}, tom{456, "tom"}, jerry{123456, "jerry"};
@@ -71,6 +72,30 @@ assert(yourClass["students"_f][1] == (Student{123456, "jerry"}));
7172
assert(yourClass == myClass);
7273
assert(begin_diff(bufferEnd2, bufferEnd) == 0);
7374
```
75+
- Mode with buffer overflow control (safe mode)
76+
```c++
77+
// serialization
78+
Student twice {123, "twice"}, tom{456, "tom"}, jerry{123456, "jerry"};
79+
Class myClass {"class 101", {tom, jerry}};
80+
myClass["students"_f].push_back(twice);
81+
82+
array<byte, 64> buffer{};
83+
auto result = message_coder<Class>::encode<safe_mode>(myClass, buffer);
84+
assert (result.has_value());
85+
const auto& bufferEnd = *result;
86+
assert(begin_diff(bufferEnd, buffer) == 45);
87+
88+
// deserialization
89+
auto result2 = message_coder<Class>::decode<safe_mode>(buffer);
90+
assert (result2.has_value());
91+
const auto& [yourClass, bufferEnd2] = *result2;
92+
assert(yourClass["name"_f] == "class 101");
93+
assert(yourClass["students"_f][2]["name"_f] == "twice");
94+
assert(yourClass["students"_f][2]["id"_f] == 123);
95+
assert(yourClass["students"_f][1] == (Student{123456, "jerry"}));
96+
assert(yourClass == myClass);
97+
assert(begin_diff(bufferEnd2, bufferEnd) == 0);
98+
```
7499
More examples can be found in our test cases ([test/message.cpp](https://github.com/PragmaTwice/protopuf/blob/master/test/message.cpp)).
75100

76101
## Supported Field Types

include/protopuf/array.h

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,33 +44,53 @@ namespace pp {
4444

4545
array_coder() = delete;
4646

47-
static constexpr bytes encode(const R& con, bytes b) {
47+
template <coder_mode Mode = unsafe_mode>
48+
static constexpr encode_result<Mode> encode(const R& con, bytes b) {
4849
uint<8> n = 0;
4950

5051
for(const auto &i : con) {
5152
n += skipper<C>::encode_skip(i);
5253
}
5354

54-
b = varint_coder<uint<8>>::encode(n, b);
55+
bytes safe_b;
56+
if (!Mode::get_value_from_result(varint_coder<uint<8>>::encode<Mode>(n, b), safe_b)) {
57+
return {};
58+
}
5559

5660
for(const auto& i : con) {
57-
b = C::encode(i, b);
61+
if (!Mode::get_value_from_result(C::template encode<Mode>(i, safe_b), safe_b)) {
62+
return {};
63+
}
5864
}
59-
60-
return b;
65+
return encode_result<Mode>{safe_b};
6166
}
6267

63-
static constexpr decode_result<R> decode(bytes b) {
68+
template <coder_mode Mode = unsafe_mode>
69+
static constexpr decode_result<R, Mode> decode(bytes b) {
70+
decode_value<uint<8>> decode_len;
71+
if (!Mode::get_value_from_result(varint_coder<uint<8>>::decode<Mode>(b), decode_len)) {
72+
return {};
73+
}
74+
6475
uint<8> len = 0;
65-
std::tie(len, b) = varint_coder<uint<8>>::decode(b);
76+
std::tie(len, b) = decode_len;
6677
R con;
6778

68-
auto origin_b = b;
79+
if constexpr (requires { con.reserve(); }) {
80+
con.reserve(len);
81+
}
82+
83+
const auto origin_b = b;
84+
decode_value<typename C::value_type> decode_v;
6985
while(begin_diff(b, origin_b) < len) {
70-
std::tie(*std::inserter(con, con.end()), b) = C::decode(b);
86+
if (Mode::get_value_from_result(C::template decode<Mode>(b), decode_v)) {
87+
std::tie(*std::inserter(con, con.end()), b) = std::move(decode_v);
88+
} else {
89+
return {};
90+
}
7191
}
7292

73-
return {con, b};
93+
return Mode::template make_result<decode_result<R, Mode>>(std::move(con), b);
7494
}
7595
};
7696

@@ -91,11 +111,21 @@ namespace pp {
91111
return n;
92112
}
93113

94-
static constexpr bytes decode_skip(bytes b) {
114+
template <coder_mode Mode = unsafe_mode>
115+
static constexpr decode_skip_result<Mode> decode_skip(bytes b) {
116+
decode_value<uint<8>> decode_len;
117+
if (!Mode::get_value_from_result(varint_coder<uint<8>>::decode<Mode>(b), decode_len)) {
118+
return {};
119+
}
120+
95121
uint<8> n = 0;
96-
std::tie(n, b) = varint_coder<uint<8>>::decode(b);
122+
std::tie(n, b) = decode_len;
123+
124+
if (!Mode::check_bytes_span(b, n)) {
125+
return {};
126+
}
97127

98-
return b.subspan(n);
128+
return Mode::template make_result<decode_skip_result<Mode>>(b.subspan(n));
99129
}
100130
};
101131

include/protopuf/bool.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
#include "coder.h"
1919
#include "byte.h"
20-
#include "varint.h"
20+
#include "int.h"
2121

2222
namespace pp {
2323

@@ -27,12 +27,14 @@ namespace pp {
2727

2828
bool_coder() = delete;
2929

30-
static constexpr bytes encode(bool i, bytes b) {
31-
return integer_coder<uint<1>>::encode(i, b);
30+
template <coder_mode Mode = unsafe_mode>
31+
static constexpr encode_result<Mode> encode(bool i, bytes b) {
32+
return integer_coder<uint<1>>::encode<Mode>(i, b);
3233
}
3334

34-
static constexpr decode_result<bool> decode(bytes b) {
35-
return integer_coder<uint<1>>::decode(b);
35+
template <coder_mode Mode = unsafe_mode>
36+
static constexpr decode_result<bool, Mode> decode(bytes b) {
37+
return integer_coder<uint<1>>::decode<Mode>(b);
3638
}
3739
};
3840

include/protopuf/byte.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ namespace pp {
3535
/// Returns the byte-distance between `begin(a)` and `begin(b)`.
3636
inline constexpr std::size_t begin_diff(bytes a, bytes b) {
3737
// `std::to_address` is used here for MSVC, ref to https://github.com/microsoft/STL/issues/1435
38-
return std::to_address(a.begin()) - std::to_address(b.begin());
38+
return static_cast<std::size_t>(std::to_address(a.begin()) - std::to_address(b.begin()));
3939
}
4040
}
4141

include/protopuf/coder.h

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,42 +17,60 @@
1717

1818
#include <utility>
1919
#include "byte.h"
20+
#include "coder_mode.h"
2021

2122
namespace pp {
2223

24+
/// @brief A type which `encoder`'s `encode` returns.
25+
/// @param Mode the encoding mode
26+
template<coder_mode Mode>
27+
using encode_result = typename Mode::template result_type<bytes>;
28+
2329
/// @brief A pair type which `decoder`'s `decode` returns.
2430
/// - Left type of pair `T`: the type of decoded object.
2531
/// - Right type of pair `bytes`: the `bytes` which remains not decoded after finishing `decode`.
2632
template<typename T>
27-
using decode_result = std::pair<T, bytes>;
33+
using decode_value = std::pair<T, bytes>;
34+
35+
/// @brief A type which `decoder`'s `decode` returns.
36+
/// @param T the type of decoded object
37+
/// @param Mode the decoding mode
38+
template<typename T, coder_mode Mode>
39+
using decode_result = typename Mode::template result_type<decode_value<T>>;
2840

2941
/// @brief Describes a type with static member function `encode`, which serializes an object to `bytes` (no ownership).
3042
///
43+
/// Encoding can be performed in different modes.
3144
/// Type alias `value_type` describes type of the object to be encoded.
3245
/// Static member function `encode`:
3346
/// @param v the object to be encoded (source object).
3447
/// @param s the bytes which the object `v` is encoded into (target bytes).
35-
/// @returns a bytes from `begin(s) + encoding_length(v)` to `end(s)`, where `encoding_length` is the length of
48+
/// @returns the @ref encode_result which depends on the encoding mode.
49+
/// The result contains a bytes from `begin(s) + encoding_length(v)` to `end(s)`, where `encoding_length` is the length of
3650
/// encoded object (bytes form), representing the left bytes which remains not used yet.
3751
template<typename T>
3852
concept encoder = requires(typename T::value_type v, bytes s) {
3953
typename T::value_type;
40-
{ T::encode(v, s) } -> std::same_as<bytes>;
54+
{ T::template encode<unsafe_mode>(v, s) } -> std::same_as<encode_result<unsafe_mode>>;
55+
{ T::template encode<safe_mode>(v, s) } -> std::same_as<encode_result<safe_mode>>;
4156
};
4257

4358
/// @brief Describes a type with static member function `decode`, which deserializes some `bytes` to an object.
4459
///
60+
/// Decoding can be performed in different modes.
4561
/// Type alias `value_type` describes type of the object to be decoded.
4662
/// Static member function `decode`:
4763
/// @param s the bytes which the object is decoded from (source bytes).
48-
/// @returns the @ref decode_result which is a pair including:
64+
/// @returns the @ref decode_result which depends on the encoding mode.
65+
/// The result contains a pair including:
4966
/// - the decoded object `v`;
5067
/// - the bytes from `begin(s) + decoding_length(v)` to `end(s)`, where `decoding_length` is the length of
5168
/// decoded object (bytes form), representing the left bytes which remains not used yet.
5269
template<typename T>
5370
concept decoder = requires(bytes s) {
5471
typename T::value_type;
55-
{ T::decode(s) } -> std::same_as<decode_result<typename T::value_type>>;
72+
{ T::template decode<unsafe_mode>(s) } -> std::same_as<decode_result<typename T::value_type, unsafe_mode>>;
73+
{ T::template decode<safe_mode>(s) } -> std::same_as<decode_result<typename T::value_type, safe_mode>>;
5674
};
5775

5876
/// @brief Describes a type which is both @ref encoder and @ref decoder.

include/protopuf/coder_mode.h

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// Copyright 2020-2024 PragmaTwice
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#ifndef PROTOPUF_CODER_MODE_H
16+
#define PROTOPUF_CODER_MODE_H
17+
18+
#include <optional>
19+
#include "byte.h"
20+
21+
namespace pp {
22+
23+
/// @brief Describes a type with static member function `make_result`,
24+
/// which make an encoding/decoding result which depends on the coding mode.
25+
/// @param v some object that could potentially be contained as a result of encoding.
26+
/// @returns the encoding/decoding result which depends on the coding mode.
27+
template<typename T>
28+
concept coder_result_maker = requires(std::pair<int, int> v) {
29+
typename T::template result_type<decltype(v)>;
30+
31+
{ T::template make_result<typename T::template result_type<decltype(v)>>(0, 0) } ->
32+
std::same_as<typename T::template result_type<decltype(v)>>;
33+
};
34+
35+
/// @brief Describes a type with static member function `get_value_from_result`,
36+
/// which extract value from encoding/decoding result depending on encoding/decoding mode.
37+
/// @param v some object that could potentially be extracted from the encoding result.
38+
/// @param r the encoding/decoding result.
39+
/// @returns true if the value is extracted, otherwise false.
40+
template<typename T>
41+
concept coder_result_value_getter = requires(std::pair<int, int> v, typename T::template result_type<decltype(v)> r) {
42+
typename T::template result_type<decltype(v)>;
43+
44+
{ T::template get_value_from_result<typename T::template result_type<decltype(v)>>(std::move(r), v) } -> std::same_as<bool>;
45+
};
46+
47+
/// @brief Describes a type with static member function `check_iterator`,
48+
/// which checks if an iterator is valid depending on the encoding/decoding mode.
49+
/// @param itr the iterator that checks for validity.
50+
/// @param end the iterator to the element following the last element.
51+
/// @returns true if the iterator is valid, otherwise false.
52+
template<typename T>
53+
concept iterator_checker = requires(bytes::iterator itr, bytes::iterator end) {
54+
{ T::check_iterator(itr, end) } -> std::same_as<bool>;
55+
};
56+
57+
/// @brief Describes a type with static member function `check_bytes_span`,
58+
/// which checks if the span offset is valid depending on the encoding/decoding mode.
59+
/// @param b the byte span.
60+
/// @param offset offset into the span of byte that checks for validity.
61+
/// @returns true if the offset is valid, otherwise false.
62+
template<typename T>
63+
concept bytes_span_checker = requires(bytes b, std::size_t offset) {
64+
{ T::check_bytes_span(b, offset) } -> std::same_as<bool>;
65+
};
66+
67+
/// @brief Describes a type for the coder operating mode.
68+
template<typename T>
69+
concept coder_mode = coder_result_maker<T> && coder_result_value_getter<T> && iterator_checker<T> && bytes_span_checker<T>;
70+
71+
/// @brief Unsafe @ref coder_mode to perform coding without buffer overflow checking
72+
struct unsafe_mode {
73+
template<typename T>
74+
using result_type = std::remove_reference_t<T>;
75+
76+
template<typename R, typename... Args>
77+
static constexpr R make_result(Args&&... args) {
78+
return R{std::forward<Args>(args)...};
79+
}
80+
81+
template<typename T>
82+
static constexpr bool get_value_from_result(T&& result, auto& value) {
83+
value = std::forward<T>(result);
84+
return true;
85+
}
86+
87+
static constexpr bool check_iterator(bytes::iterator, bytes::iterator) {
88+
return true;
89+
}
90+
91+
static constexpr bool check_bytes_span(bytes, std::size_t) {
92+
return true;
93+
}
94+
};
95+
96+
/// @brief Safe @ref coder_mode to perform coding with buffer overflow checking (the coding result is wrapped into std::optional)
97+
struct safe_mode {
98+
template<typename T>
99+
using result_type = std::optional<std::remove_reference_t<T>>;
100+
101+
template<typename R, typename... Args>
102+
static constexpr R make_result(Args&&... args) {
103+
return R{std::in_place, std::forward<Args>(args)...};
104+
}
105+
106+
template<typename T>
107+
static constexpr bool get_value_from_result(T&& result, auto& value) {
108+
if (result.has_value()) {
109+
value = std::forward<decltype(*result)>(*result);
110+
} else {
111+
return false;
112+
}
113+
return true;
114+
}
115+
116+
static constexpr bool check_iterator(bytes::iterator iter, bytes::iterator end) {
117+
return iter != end;
118+
}
119+
120+
static constexpr bool check_bytes_span(bytes b, std::size_t offset) {
121+
return b.size() >= offset;
122+
}
123+
};
124+
125+
}
126+
127+
#endif //PROTOPUF_CODER_MODE_H

include/protopuf/enum.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,19 @@ namespace pp {
3232

3333
enum_coder() = delete;
3434

35-
static constexpr bytes encode(T i, bytes b) {
36-
return varint_coder<std::underlying_type_t<T>>::encode(static_cast<std::underlying_type_t<T>>(i), b);
35+
template<coder_mode Mode>
36+
static constexpr encode_result<Mode> encode(T i, bytes b) {
37+
return varint_coder<std::underlying_type_t<T>>::template encode<Mode>(static_cast<std::underlying_type_t<T>>(i), b);
3738
}
3839

39-
static constexpr decode_result<T> decode(bytes b) {
40-
auto [res, bytes] = varint_coder<std::underlying_type_t<T>>::decode(b);
41-
return {static_cast<T>(res), bytes};
40+
template<coder_mode Mode>
41+
static constexpr decode_result<T, Mode> decode(bytes b) {
42+
decode_value<std::underlying_type_t<T>> decode_v;
43+
if (Mode::get_value_from_result(varint_coder<std::underlying_type_t<T>>::template decode<Mode>(b), decode_v)) {
44+
return Mode::template make_result<decode_result<T, Mode>>(static_cast<T>(decode_v.first), decode_v.second);
45+
}
46+
47+
return {};
4248
}
4349
};
4450

0 commit comments

Comments
 (0)