Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 1ccb0bb

Browse files
committedNov 24, 2022
Initial commit
0 parents  commit 1ccb0bb

File tree

6 files changed

+875
-0
lines changed

6 files changed

+875
-0
lines changed
 

‎BUILD.bazel

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
2+
load("@bazel_skylib//lib:selects.bzl", "selects")
3+
4+
bool_flag(
5+
# Since this flag can be set from the command line, give it a very ugly name
6+
# to discourage its use by users.
7+
name = "__internal_has_libfuzzer",
8+
build_setting_default = False,
9+
)
10+
11+
config_setting(
12+
name = "has_libfuzzer",
13+
flag_values = {":__internal_has_libfuzzer": "True"},
14+
)
15+
16+
selects.config_setting_group(
17+
name = "on_linux_and_has_libfuzzer",
18+
match_all = [
19+
":has_libfuzzer",
20+
"@platforms//os:linux",
21+
],
22+
)
23+
24+
cc_library(
25+
name = "cifuzz",
26+
srcs = select({
27+
# We have to use dumper.cpp instead of dumper.c here as the non-Xcode
28+
# toolchain on macOS needs -isysroot to be set to the macOS SDK path in
29+
# order to find C headers. However, Bazel only offers BAZEL_CXXOPTS to
30+
# set this flag in a way that affects the builtin include directory list
31+
# compiled by the @local_config_cc auto-configured toolchain. There is
32+
# no BAZEL_COPTS.
33+
":has_libfuzzer": ["src/dumper.cpp"],
34+
"//conditions:default": [],
35+
}),
36+
hdrs = [
37+
"include/cifuzz/cifuzz.h",
38+
"include/fuzzer/FuzzedDataProvider.h",
39+
],
40+
includes = ["include"],
41+
linkopts = select({
42+
# On Linux, we wrap the __sanitizer_set_death_callback method to
43+
# also dump sanitizer findings when they are non-fatal.
44+
# See src/dumper.c for details.
45+
":on_linux_and_has_libfuzzer": ["-Wl,--wrap=__sanitizer_set_death_callback"],
46+
"//conditions:default": [],
47+
}),
48+
linkstatic = True,
49+
visibility = ["//visibility:public"],
50+
alwayslink = True,
51+
)

‎LICENSE

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
Apache License
2+
Version 2.0, January 2004
3+
http://www.apache.org/licenses/
4+
5+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6+
7+
1. Definitions.
8+
9+
"License" shall mean the terms and conditions for use, reproduction,
10+
and distribution as defined by Sections 1 through 9 of this document.
11+
12+
"Licensor" shall mean the copyright owner or entity authorized by
13+
the copyright owner that is granting the License.
14+
15+
"Legal Entity" shall mean the union of the acting entity and all
16+
other entities that control, are controlled by, or are under common
17+
control with that entity. For the purposes of this definition,
18+
"control" means (i) the power, direct or indirect, to cause the
19+
direction or management of such entity, whether by contract or
20+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
21+
outstanding shares, or (iii) beneficial ownership of such entity.
22+
23+
"You" (or "Your") shall mean an individual or Legal Entity
24+
exercising permissions granted by this License.
25+
26+
"Source" form shall mean the preferred form for making modifications,
27+
including but not limited to software source code, documentation
28+
source, and configuration files.
29+
30+
"Object" form shall mean any form resulting from mechanical
31+
transformation or translation of a Source form, including but
32+
not limited to compiled object code, generated documentation,
33+
and conversions to other media types.
34+
35+
"Work" shall mean the work of authorship, whether in Source or
36+
Object form, made available under the License, as indicated by a
37+
copyright notice that is included in or attached to the work
38+
(an example is provided in the Appendix below).
39+
40+
"Derivative Works" shall mean any work, whether in Source or Object
41+
form, that is based on (or derived from) the Work and for which the
42+
editorial revisions, annotations, elaborations, or other modifications
43+
represent, as a whole, an original work of authorship. For the purposes
44+
of this License, Derivative Works shall not include works that remain
45+
separable from, or merely link (or bind by name) to the interfaces of,
46+
the Work and Derivative Works thereof.
47+
48+
"Contribution" shall mean any work of authorship, including
49+
the original version of the Work and any modifications or additions
50+
to that Work or Derivative Works thereof, that is intentionally
51+
submitted to Licensor for inclusion in the Work by the copyright owner
52+
or by an individual or Legal Entity authorized to submit on behalf of
53+
the copyright owner. For the purposes of this definition, "submitted"
54+
means any form of electronic, verbal, or written communication sent
55+
to the Licensor or its representatives, including but not limited to
56+
communication on electronic mailing lists, source code control systems,
57+
and issue tracking systems that are managed by, or on behalf of, the
58+
Licensor for the purpose of discussing and improving the Work, but
59+
excluding communication that is conspicuously marked or otherwise
60+
designated in writing by the copyright owner as "Not a Contribution."
61+
62+
"Contributor" shall mean Licensor and any individual or Legal Entity
63+
on behalf of whom a Contribution has been received by Licensor and
64+
subsequently incorporated within the Work.
65+
66+
2. Grant of Copyright License. Subject to the terms and conditions of
67+
this License, each Contributor hereby grants to You a perpetual,
68+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69+
copyright license to reproduce, prepare Derivative Works of,
70+
publicly display, publicly perform, sublicense, and distribute the
71+
Work and such Derivative Works in Source or Object form.
72+
73+
3. Grant of Patent License. Subject to the terms and conditions of
74+
this License, each Contributor hereby grants to You a perpetual,
75+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76+
(except as stated in this section) patent license to make, have made,
77+
use, offer to sell, sell, import, and otherwise transfer the Work,
78+
where such license applies only to those patent claims licensable
79+
by such Contributor that are necessarily infringed by their
80+
Contribution(s) alone or by combination of their Contribution(s)
81+
with the Work to which such Contribution(s) was submitted. If You
82+
institute patent litigation against any entity (including a
83+
cross-claim or counterclaim in a lawsuit) alleging that the Work
84+
or a Contribution incorporated within the Work constitutes direct
85+
or contributory patent infringement, then any patent licenses
86+
granted to You under this License for that Work shall terminate
87+
as of the date such litigation is filed.
88+
89+
4. Redistribution. You may reproduce and distribute copies of the
90+
Work or Derivative Works thereof in any medium, with or without
91+
modifications, and in Source or Object form, provided that You
92+
meet the following conditions:
93+
94+
(a) You must give any other recipients of the Work or
95+
Derivative Works a copy of this License; and
96+
97+
(b) You must cause any modified files to carry prominent notices
98+
stating that You changed the files; and
99+
100+
(c) You must retain, in the Source form of any Derivative Works
101+
that You distribute, all copyright, patent, trademark, and
102+
attribution notices from the Source form of the Work,
103+
excluding those notices that do not pertain to any part of
104+
the Derivative Works; and
105+
106+
(d) If the Work includes a "NOTICE" text file as part of its
107+
distribution, then any Derivative Works that You distribute must
108+
include a readable copy of the attribution notices contained
109+
within such NOTICE file, excluding those notices that do not
110+
pertain to any part of the Derivative Works, in at least one
111+
of the following places: within a NOTICE text file distributed
112+
as part of the Derivative Works; within the Source form or
113+
documentation, if provided along with the Derivative Works; or,
114+
within a display generated by the Derivative Works, if and
115+
wherever such third-party notices normally appear. The contents
116+
of the NOTICE file are for informational purposes only and
117+
do not modify the License. You may add Your own attribution
118+
notices within Derivative Works that You distribute, alongside
119+
or as an addendum to the NOTICE text from the Work, provided
120+
that such additional attribution notices cannot be construed
121+
as modifying the License.
122+
123+
You may add Your own copyright statement to Your modifications and
124+
may provide additional or different license terms and conditions
125+
for use, reproduction, or distribution of Your modifications, or
126+
for any such Derivative Works as a whole, provided Your use,
127+
reproduction, and distribution of the Work otherwise complies with
128+
the conditions stated in this License.
129+
130+
5. Submission of Contributions. Unless You explicitly state otherwise,
131+
any Contribution intentionally submitted for inclusion in the Work
132+
by You to the Licensor shall be under the terms and conditions of
133+
this License, without any additional terms or conditions.
134+
Notwithstanding the above, nothing herein shall supersede or modify
135+
the terms of any separate license agreement you may have executed
136+
with Licensor regarding such Contributions.
137+
138+
6. Trademarks. This License does not grant permission to use the trade
139+
names, trademarks, service marks, or product names of the Licensor,
140+
except as required for reasonable and customary use in describing the
141+
origin of the Work and reproducing the content of the NOTICE file.
142+
143+
7. Disclaimer of Warranty. Unless required by applicable law or
144+
agreed to in writing, Licensor provides the Work (and each
145+
Contributor provides its Contributions) on an "AS IS" BASIS,
146+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147+
implied, including, without limitation, any warranties or conditions
148+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149+
PARTICULAR PURPOSE. You are solely responsible for determining the
150+
appropriateness of using or redistributing the Work and assume any
151+
risks associated with Your exercise of permissions under this License.
152+
153+
8. Limitation of Liability. In no event and under no legal theory,
154+
whether in tort (including negligence), contract, or otherwise,
155+
unless required by applicable law (such as deliberate and grossly
156+
negligent acts) or agreed to in writing, shall any Contributor be
157+
liable to You for damages, including any direct, indirect, special,
158+
incidental, or consequential damages of any character arising as a
159+
result of this License or out of the use or inability to use the
160+
Work (including but not limited to damages for loss of goodwill,
161+
work stoppage, computer failure or malfunction, or any and all
162+
other commercial damages or losses), even if such Contributor
163+
has been advised of the possibility of such damages.
164+
165+
9. Accepting Warranty or Additional Liability. While redistributing
166+
the Work or Derivative Works thereof, You may choose to offer,
167+
and charge a fee for, acceptance of support, warranty, indemnity,
168+
or other liability obligations and/or rights consistent with this
169+
License. However, in accepting such obligations, You may act only
170+
on Your own behalf and on Your sole responsibility, not on behalf
171+
of any other Contributor, and only if You agree to indemnify,
172+
defend, and hold each Contributor harmless for any liability
173+
incurred by, or claims asserted against, such Contributor by reason
174+
of your accepting any such warranty or additional liability.
175+
176+
END OF TERMS AND CONDITIONS
177+
178+
APPENDIX: How to apply the Apache License to your work.
179+
180+
To apply the Apache License to your work, attach the following
181+
boilerplate notice, with the fields enclosed by brackets "[]"
182+
replaced with your own identifying information. (Don't include
183+
the brackets!) The text should be enclosed in the appropriate
184+
comment syntax for the file format. We also recommend that a
185+
file or class name and description of purpose be included on the
186+
same "printed page" as the copyright notice for easier
187+
identification within third-party archives.
188+
189+
Copyright [yyyy] [name of copyright owner]
190+
191+
Licensed under the Apache License, Version 2.0 (the "License");
192+
you may not use this file except in compliance with the License.
193+
You may obtain a copy of the License at
194+
195+
http://www.apache.org/licenses/LICENSE-2.0
196+
197+
Unless required by applicable law or agreed to in writing, software
198+
distributed under the License is distributed on an "AS IS" BASIS,
199+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200+
See the License for the specific language governing permissions and
201+
limitations under the License.

‎WORKSPACE

Whitespace-only changes.

‎include/cifuzz/cifuzz.h

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#ifndef CIFUZZ_CIFUZZ_H
2+
#define CIFUZZ_CIFUZZ_H
3+
4+
/* Include the headers providing the definitions required to use FUZZ_TEST. */
5+
#ifdef __cplusplus
6+
#include <cstddef>
7+
#include <cstdint>
8+
#else
9+
#include <stddef.h>
10+
#include <stdint.h>
11+
#endif
12+
13+
#if defined(__CLION_IDE__) && defined(__cplusplus)
14+
/* This code will only be seen by CLion's static analysis/preprocessing engine
15+
* and thus doesn't have to contain any definitions, declarations are
16+
* sufficient. It mocks enough of the Doctest classes to make CLion's test
17+
* framework support treat it as the full Doctest library. */
18+
namespace doctest {
19+
namespace detail {
20+
struct TestSuite {};
21+
typedef int (*funcType)(const uint8_t *data, std::size_t size);
22+
struct TestCase
23+
{
24+
TestCase(funcType test, const char* file, unsigned line, const TestSuite& test_suite,
25+
const char* type = "", int template_id = -1);
26+
TestCase& operator*(const char* in);
27+
};
28+
int regTest(const TestCase& tc);
29+
}
30+
}
31+
32+
/* This macro has to be defined or CLion will not show a play button, but the
33+
* value doesn't matter. */
34+
#define DOCTEST_TEST_CASE
35+
36+
#define CLION_TEST_PLAY_BUTTON \
37+
/* Silence a CLion warning about a static
38+
* initializer with static storage duration */ \
39+
/* NOLINTBEGIN(cert-err58-cpp) */ \
40+
static const int DOCTEST_ANON_VAR_15771531 = \
41+
doctest::detail::regTest( \
42+
doctest::detail::TestCase( \
43+
&LLVMFuzzerTestOneInput, \
44+
"", \
45+
1, \
46+
doctest::detail::TestSuite() \
47+
/* This string is used as the test name and has
48+
* to be globally unique so that CLion
49+
* generates a unique run configuration per
50+
* test. */ \
51+
) * CIFUZZ_TEST_NAME); \
52+
/* NOLINTEND(cert-err58-cpp) */
53+
#else
54+
#define CLION_TEST_PLAY_BUTTON
55+
#endif
56+
57+
#ifdef __cplusplus
58+
#define CIFUZZ_C_LINKAGE extern "C"
59+
#else
60+
#define CIFUZZ_C_LINKAGE
61+
#endif
62+
63+
#ifndef CIFUZZ_TEST_NAME
64+
#define CIFUZZ_TEST_NAME NULL
65+
#endif
66+
#ifndef CIFUZZ_SEED_CORPUS
67+
#define CIFUZZ_SEED_CORPUS NULL
68+
#endif
69+
#ifndef CIFUZZ_GENERATED_CORPUS
70+
#define CIFUZZ_GENERATED_CORPUS NULL
71+
#endif
72+
73+
#define FUZZ_TEST \
74+
static void LLVMFuzzerTestOneInputNoReturn(const uint8_t *data, size_t size); \
75+
CIFUZZ_C_LINKAGE int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { \
76+
LLVMFuzzerTestOneInputNoReturn(data, size); \
77+
return 0; \
78+
} \
79+
CIFUZZ_C_LINKAGE const char *cifuzz_test_name(void) { \
80+
return CIFUZZ_TEST_NAME; \
81+
} \
82+
CIFUZZ_C_LINKAGE const char *cifuzz_seed_corpus(void) { \
83+
return CIFUZZ_SEED_CORPUS; \
84+
} \
85+
CIFUZZ_C_LINKAGE const char *cifuzz_generated_corpus(void) { \
86+
return CIFUZZ_GENERATED_CORPUS; \
87+
} \
88+
CLION_TEST_PLAY_BUTTON \
89+
void LLVMFuzzerTestOneInputNoReturn
90+
91+
#define FUZZ_TEST_SETUP \
92+
static void LLVMFuzzerInitializeNoReturn(void); \
93+
CIFUZZ_C_LINKAGE int LLVMFuzzerInitialize(int *argc, char ***argv) { \
94+
(void) argc; \
95+
(void) argv; \
96+
LLVMFuzzerInitializeNoReturn(); \
97+
return 0; \
98+
} \
99+
void LLVMFuzzerInitializeNoReturn
100+
101+
#endif // CIFUZZ_CIFUZZ_H

‎include/fuzzer/FuzzedDataProvider.h

Lines changed: 409 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,409 @@
1+
// Based on:
2+
// https://github.com/llvm/llvm-project/blob/f3547fd541cac91c5ee281052584b05275ddc915/compiler-rt/include/fuzzer/FuzzedDataProvider.h
3+
4+
// Modified by Fabian Meumertzheim:
5+
// - added preprocessor check for C++11
6+
//
7+
//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
8+
//
9+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10+
// See https://llvm.org/LICENSE.txt for license information.
11+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12+
//
13+
//===----------------------------------------------------------------------===//
14+
// A single header library providing an utility class to break up an array of
15+
// bytes. Whenever run on the same input, provides the same output, as long as
16+
// its methods are called in the same order, with the same arguments.
17+
//===----------------------------------------------------------------------===//
18+
19+
#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
20+
#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
21+
22+
// MSVC doesn't report C++11 compliance, see:
23+
// https://developercommunity.visualstudio.com/t/msvc-incorrectly-defines-cplusplus/139261
24+
#if __cplusplus < 201103L && !defined(_MSVC_LANG)
25+
#error "FuzzedDataProvider.h requires C++11 or higher"
26+
#endif
27+
28+
#include <algorithm>
29+
#include <array>
30+
#include <climits>
31+
#include <cstddef>
32+
#include <cstdint>
33+
#include <cstring>
34+
#include <initializer_list>
35+
#include <limits>
36+
#include <string>
37+
#include <type_traits>
38+
#include <utility>
39+
#include <vector>
40+
41+
// In addition to the comments below, the API is also briefly documented at
42+
// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
43+
class FuzzedDataProvider {
44+
public:
45+
// |data| is an array of length |size| that the FuzzedDataProvider wraps to
46+
// provide more granular access. |data| must outlive the FuzzedDataProvider.
47+
FuzzedDataProvider(const uint8_t *data, size_t size)
48+
: data_ptr_(data), remaining_bytes_(size) {}
49+
~FuzzedDataProvider() = default;
50+
51+
// See the implementation below (after the class definition) for more verbose
52+
// comments for each of the methods.
53+
54+
// Methods returning std::vector of bytes. These are the most popular choice
55+
// when splitting fuzzing input into pieces, as every piece is put into a
56+
// separate buffer (i.e. ASan would catch any under-/overflow) and the memory
57+
// will be released automatically.
58+
template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes);
59+
template <typename T>
60+
std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, T terminator = 0);
61+
template <typename T> std::vector<T> ConsumeRemainingBytes();
62+
63+
// Methods returning strings. Use only when you need a std::string or a null
64+
// terminated C-string. Otherwise, prefer the methods returning std::vector.
65+
std::string ConsumeBytesAsString(size_t num_bytes);
66+
std::string ConsumeRandomLengthString(size_t max_length);
67+
std::string ConsumeRandomLengthString();
68+
std::string ConsumeRemainingBytesAsString();
69+
70+
// Methods returning integer values.
71+
template <typename T> T ConsumeIntegral();
72+
template <typename T> T ConsumeIntegralInRange(T min, T max);
73+
74+
// Methods returning floating point values.
75+
template <typename T> T ConsumeFloatingPoint();
76+
template <typename T> T ConsumeFloatingPointInRange(T min, T max);
77+
78+
// 0 <= return value <= 1.
79+
template <typename T> T ConsumeProbability();
80+
81+
bool ConsumeBool();
82+
83+
// Returns a value chosen from the given enum.
84+
template <typename T> T ConsumeEnum();
85+
86+
// Returns a value from the given array.
87+
template <typename T, size_t size> T PickValueInArray(const T (&array)[size]);
88+
template <typename T, size_t size>
89+
T PickValueInArray(const std::array<T, size> &array);
90+
template <typename T> T PickValueInArray(std::initializer_list<const T> list);
91+
92+
// Writes data to the given destination and returns number of bytes written.
93+
size_t ConsumeData(void *destination, size_t num_bytes);
94+
95+
// Reports the remaining bytes available for fuzzed input.
96+
size_t remaining_bytes() { return remaining_bytes_; }
97+
98+
private:
99+
FuzzedDataProvider(const FuzzedDataProvider &) = delete;
100+
FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
101+
102+
void CopyAndAdvance(void *destination, size_t num_bytes);
103+
104+
void Advance(size_t num_bytes);
105+
106+
template <typename T>
107+
std::vector<T> ConsumeBytes(size_t size, size_t num_bytes);
108+
109+
template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value);
110+
111+
const uint8_t *data_ptr_;
112+
size_t remaining_bytes_;
113+
};
114+
115+
// Returns a std::vector containing |num_bytes| of input data. If fewer than
116+
// |num_bytes| of data remain, returns a shorter std::vector containing all
117+
// of the data that's left. Can be used with any byte sized type, such as
118+
// char, unsigned char, uint8_t, etc.
119+
template <typename T>
120+
std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t num_bytes) {
121+
num_bytes = std::min(num_bytes, remaining_bytes_);
122+
return ConsumeBytes<T>(num_bytes, num_bytes);
123+
}
124+
125+
// Similar to |ConsumeBytes|, but also appends the terminator value at the end
126+
// of the resulting vector. Useful, when a mutable null-terminated C-string is
127+
// needed, for example. But that is a rare case. Better avoid it, if possible,
128+
// and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
129+
template <typename T>
130+
std::vector<T> FuzzedDataProvider::ConsumeBytesWithTerminator(size_t num_bytes,
131+
T terminator) {
132+
num_bytes = std::min(num_bytes, remaining_bytes_);
133+
std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
134+
result.back() = terminator;
135+
return result;
136+
}
137+
138+
// Returns a std::vector containing all remaining bytes of the input data.
139+
template <typename T>
140+
std::vector<T> FuzzedDataProvider::ConsumeRemainingBytes() {
141+
return ConsumeBytes<T>(remaining_bytes_);
142+
}
143+
144+
// Returns a std::string containing |num_bytes| of input data. Using this and
145+
// |.c_str()| on the resulting string is the best way to get an immutable
146+
// null-terminated C string. If fewer than |num_bytes| of data remain, returns
147+
// a shorter std::string containing all of the data that's left.
148+
inline std::string FuzzedDataProvider::ConsumeBytesAsString(size_t num_bytes) {
149+
static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
150+
"ConsumeBytesAsString cannot convert the data to a string.");
151+
152+
num_bytes = std::min(num_bytes, remaining_bytes_);
153+
std::string result(
154+
reinterpret_cast<const std::string::value_type *>(data_ptr_), num_bytes);
155+
Advance(num_bytes);
156+
return result;
157+
}
158+
159+
// Returns a std::string of length from 0 to |max_length|. When it runs out of
160+
// input data, returns what remains of the input. Designed to be more stable
161+
// with respect to a fuzzer inserting characters than just picking a random
162+
// length and then consuming that many bytes with |ConsumeBytes|.
163+
inline std::string
164+
FuzzedDataProvider::ConsumeRandomLengthString(size_t max_length) {
165+
// Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
166+
// followed by anything else to the end of the string. As a result of this
167+
// logic, a fuzzer can insert characters into the string, and the string
168+
// will be lengthened to include those new characters, resulting in a more
169+
// stable fuzzer than picking the length of a string independently from
170+
// picking its contents.
171+
std::string result;
172+
173+
// Reserve the anticipated capaticity to prevent several reallocations.
174+
result.reserve(std::min(max_length, remaining_bytes_));
175+
for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
176+
char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
177+
Advance(1);
178+
if (next == '\\' && remaining_bytes_ != 0) {
179+
next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
180+
Advance(1);
181+
if (next != '\\')
182+
break;
183+
}
184+
result += next;
185+
}
186+
187+
result.shrink_to_fit();
188+
return result;
189+
}
190+
191+
// Returns a std::string of length from 0 to |remaining_bytes_|.
192+
inline std::string FuzzedDataProvider::ConsumeRandomLengthString() {
193+
return ConsumeRandomLengthString(remaining_bytes_);
194+
}
195+
196+
// Returns a std::string containing all remaining bytes of the input data.
197+
// Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
198+
// object.
199+
inline std::string FuzzedDataProvider::ConsumeRemainingBytesAsString() {
200+
return ConsumeBytesAsString(remaining_bytes_);
201+
}
202+
203+
// Returns a number in the range [Type's min, Type's max]. The value might
204+
// not be uniformly distributed in the given range. If there's no input data
205+
// left, always returns |min|.
206+
template <typename T> T FuzzedDataProvider::ConsumeIntegral() {
207+
return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
208+
std::numeric_limits<T>::max());
209+
}
210+
211+
// Returns a number in the range [min, max] by consuming bytes from the
212+
// input data. The value might not be uniformly distributed in the given
213+
// range. If there's no input data left, always returns |min|. |min| must
214+
// be less than or equal to |max|.
215+
template <typename T>
216+
T FuzzedDataProvider::ConsumeIntegralInRange(T min, T max) {
217+
static_assert(std::is_integral<T>::value, "An integral type is required.");
218+
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
219+
220+
if (min > max)
221+
abort();
222+
223+
// Use the biggest type possible to hold the range and the result.
224+
uint64_t range = static_cast<uint64_t>(max) - min;
225+
uint64_t result = 0;
226+
size_t offset = 0;
227+
228+
while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
229+
remaining_bytes_ != 0) {
230+
// Pull bytes off the end of the seed data. Experimentally, this seems to
231+
// allow the fuzzer to more easily explore the input space. This makes
232+
// sense, since it works by modifying inputs that caused new code to run,
233+
// and this data is often used to encode length of data read by
234+
// |ConsumeBytes|. Separating out read lengths makes it easier modify the
235+
// contents of the data that is actually read.
236+
--remaining_bytes_;
237+
result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
238+
offset += CHAR_BIT;
239+
}
240+
241+
// Avoid division by 0, in case |range + 1| results in overflow.
242+
if (range != std::numeric_limits<decltype(range)>::max())
243+
result = result % (range + 1);
244+
245+
return static_cast<T>(min + result);
246+
}
247+
248+
// Returns a floating point value in the range [Type's lowest, Type's max] by
249+
// consuming bytes from the input data. If there's no input data left, always
250+
// returns approximately 0.
251+
template <typename T> T FuzzedDataProvider::ConsumeFloatingPoint() {
252+
return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
253+
std::numeric_limits<T>::max());
254+
}
255+
256+
// Returns a floating point value in the given range by consuming bytes from
257+
// the input data. If there's no input data left, returns |min|. Note that
258+
// |min| must be less than or equal to |max|.
259+
template <typename T>
260+
T FuzzedDataProvider::ConsumeFloatingPointInRange(T min, T max) {
261+
if (min > max)
262+
abort();
263+
264+
T range = .0;
265+
T result = min;
266+
constexpr T zero(.0);
267+
if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
268+
// The diff |max - min| would overflow the given floating point type. Use
269+
// the half of the diff as the range and consume a bool to decide whether
270+
// the result is in the first of the second part of the diff.
271+
range = (max / 2.0) - (min / 2.0);
272+
if (ConsumeBool()) {
273+
result += range;
274+
}
275+
} else {
276+
range = max - min;
277+
}
278+
279+
return result + range * ConsumeProbability<T>();
280+
}
281+
282+
// Returns a floating point number in the range [0.0, 1.0]. If there's no
283+
// input data left, always returns 0.
284+
template <typename T> T FuzzedDataProvider::ConsumeProbability() {
285+
static_assert(std::is_floating_point<T>::value,
286+
"A floating point type is required.");
287+
288+
// Use different integral types for different floating point types in order
289+
// to provide better density of the resulting values.
290+
using IntegralType =
291+
typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
292+
uint64_t>::type;
293+
294+
T result = static_cast<T>(ConsumeIntegral<IntegralType>());
295+
result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
296+
return result;
297+
}
298+
299+
// Reads one byte and returns a bool, or false when no data remains.
300+
inline bool FuzzedDataProvider::ConsumeBool() {
301+
return 1 & ConsumeIntegral<uint8_t>();
302+
}
303+
304+
// Returns an enum value. The enum must start at 0 and be contiguous. It must
305+
// also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
306+
// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
307+
template <typename T> T FuzzedDataProvider::ConsumeEnum() {
308+
static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
309+
return static_cast<T>(
310+
ConsumeIntegralInRange<uint32_t>(0, static_cast<uint32_t>(T::kMaxValue)));
311+
}
312+
313+
// Returns a copy of the value selected from the given fixed-size |array|.
314+
template <typename T, size_t size>
315+
T FuzzedDataProvider::PickValueInArray(const T (&array)[size]) {
316+
static_assert(size > 0, "The array must be non empty.");
317+
return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
318+
}
319+
320+
template <typename T, size_t size>
321+
T FuzzedDataProvider::PickValueInArray(const std::array<T, size> &array) {
322+
static_assert(size > 0, "The array must be non empty.");
323+
return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
324+
}
325+
326+
template <typename T>
327+
T FuzzedDataProvider::PickValueInArray(std::initializer_list<const T> list) {
328+
// TODO(Dor1s): switch to static_assert once C++14 is allowed.
329+
if (!list.size())
330+
abort();
331+
332+
return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
333+
}
334+
335+
// Writes |num_bytes| of input data to the given destination pointer. If there
336+
// is not enough data left, writes all remaining bytes. Return value is the
337+
// number of bytes written.
338+
// In general, it's better to avoid using this function, but it may be useful
339+
// in cases when it's necessary to fill a certain buffer or object with
340+
// fuzzing data.
341+
inline size_t FuzzedDataProvider::ConsumeData(void *destination,
342+
size_t num_bytes) {
343+
num_bytes = std::min(num_bytes, remaining_bytes_);
344+
CopyAndAdvance(destination, num_bytes);
345+
return num_bytes;
346+
}
347+
348+
// Private methods.
349+
inline void FuzzedDataProvider::CopyAndAdvance(void *destination,
350+
size_t num_bytes) {
351+
std::memcpy(destination, data_ptr_, num_bytes);
352+
Advance(num_bytes);
353+
}
354+
355+
inline void FuzzedDataProvider::Advance(size_t num_bytes) {
356+
if (num_bytes > remaining_bytes_)
357+
abort();
358+
359+
data_ptr_ += num_bytes;
360+
remaining_bytes_ -= num_bytes;
361+
}
362+
363+
template <typename T>
364+
std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t size, size_t num_bytes) {
365+
static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
366+
367+
// The point of using the size-based constructor below is to increase the
368+
// odds of having a vector object with capacity being equal to the length.
369+
// That part is always implementation specific, but at least both libc++ and
370+
// libstdc++ allocate the requested number of bytes in that constructor,
371+
// which seems to be a natural choice for other implementations as well.
372+
// To increase the odds even more, we also call |shrink_to_fit| below.
373+
std::vector<T> result(size);
374+
if (size == 0) {
375+
if (num_bytes != 0)
376+
abort();
377+
return result;
378+
}
379+
380+
CopyAndAdvance(result.data(), num_bytes);
381+
382+
// Even though |shrink_to_fit| is also implementation specific, we expect it
383+
// to provide an additional assurance in case vector's constructor allocated
384+
// a buffer which is larger than the actual amount of data we put inside it.
385+
result.shrink_to_fit();
386+
return result;
387+
}
388+
389+
template <typename TS, typename TU>
390+
TS FuzzedDataProvider::ConvertUnsignedToSigned(TU value) {
391+
static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
392+
static_assert(!std::numeric_limits<TU>::is_signed,
393+
"Source type must be unsigned.");
394+
395+
// TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
396+
if (std::numeric_limits<TS>::is_modulo)
397+
return static_cast<TS>(value);
398+
399+
// Avoid using implementation-defined unsigned to signed conversions.
400+
// To learn more, see https://stackoverflow.com/questions/13150449.
401+
if (value <= std::numeric_limits<TS>::max()) {
402+
return static_cast<TS>(value);
403+
} else {
404+
constexpr auto TS_min = std::numeric_limits<TS>::min();
405+
return TS_min + static_cast<TS>(value - TS_min);
406+
}
407+
}
408+
409+
#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_

‎src/dumper.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#include <stddef.h>
2+
#include <string.h>
3+
4+
#ifdef __APPLE__
5+
#include <dlfcn.h>
6+
#include <pthread.h>
7+
#endif
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
static const char UBSAN_SUMMARY_PREFIX[] = "SUMMARY: UndefinedBehaviorSanitizer:";
14+
static void (*sanitizer_death_callback)(void) = NULL;
15+
16+
/*
17+
* By linking this file into a fuzz test (and adding a linker flag on Linux),
18+
* non-fatal sanitizer findings will still write an input to disk.
19+
*
20+
* For both macOS and Linux, we hook:
21+
* - __sanitizer_set_death_callback, to which libFuzzer provides a callback that
22+
* can be used to dump the current input;
23+
* - __sanitizer_report_error_summary, which is executed by all sanitizers on a
24+
* finding, regardless of whether it is fatal. Since this function is provided
25+
* the summary line, we can call __sanitizer_set_death_callback only if
26+
* needed.
27+
*/
28+
#ifdef __APPLE__
29+
/*
30+
* On macOS, sanitizers are exclusively linked dynamically, which allows us to
31+
* wrap functions simply by defining them and looking up the original function
32+
* via dlsym(RTLD_NEXT, ...). We can't use the --wrap linker flag since the
33+
* macOS linker doesn't support it.
34+
*/
35+
void __sanitizer_set_death_callback(void (*callback)(void)) {
36+
sanitizer_death_callback = callback;
37+
void *real_sanitizer_set_death_callback =
38+
dlsym(RTLD_NEXT, "__sanitizer_set_death_callback");
39+
((void (*)(void (*)()))(real_sanitizer_set_death_callback))(callback);
40+
}
41+
42+
/*
43+
* Ensure that ASan's verify_interceptors check passes: It checks that puts and
44+
* __sanitizer_report_error_summary are defined in the same object. Since we
45+
* hook the latter, we also have to (trivially) hook the former.
46+
* https://github.com/llvm/llvm-project/blob/f8a469fc572778d05b72f34a772082cf3abd3cda/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp#L987-L993
47+
* Older versions of LLVM check for pthread_create instead:
48+
* https://github.com/llvm/llvm-project/blob/abc51fac09593ec048b3b298fa274af823e0a22d/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp#L1061-L1067
49+
*/
50+
int puts(const char *str) {
51+
void *real_puts = dlsym(RTLD_NEXT, "puts");
52+
return ((int (*)(const char *))(real_puts))(str);
53+
}
54+
55+
int pthread_create(pthread_t *thread,
56+
const pthread_attr_t *attr,
57+
void *(*start_routine)(void *),
58+
void *arg) {
59+
void *real_pthread_create = dlsym(RTLD_NEXT, "pthread_create");
60+
return ((int (*)(pthread_t *, const pthread_attr_t *, void *(*)(void *), void *))(real_pthread_create))(thread, attr, start_routine, arg);
61+
}
62+
63+
void __sanitizer_report_error_summary(const char *error_summary) {
64+
void *real_sanitizer_report_error_summary =
65+
dlsym(RTLD_NEXT, "__sanitizer_report_error_summary");
66+
((void (*)(const char *))(real_sanitizer_report_error_summary))(error_summary);
67+
if (strncmp(UBSAN_SUMMARY_PREFIX, error_summary, strlen(UBSAN_SUMMARY_PREFIX)) == 0) {
68+
sanitizer_death_callback();
69+
}
70+
}
71+
#else
72+
/*
73+
* On Linux, the --wrap flag of GNU ld can be used to wrap all calls to a given
74+
* function. We can't use the macOS approach as sanitizer runtimes can be linked
75+
* statically.
76+
*/
77+
void __real___sanitizer_set_death_callback(void (*callback)(void));
78+
79+
void __wrap___sanitizer_set_death_callback(void (*callback)(void)) {
80+
sanitizer_death_callback = callback;
81+
__real___sanitizer_set_death_callback(callback);
82+
}
83+
84+
/* clang mangling applied to __sanitizer::Printf(const char *format, ...) */
85+
void _ZN11__sanitizer6PrintfEPKcz(const char *format, ...);
86+
87+
/*
88+
* If wrapped with --wrap, the __wrap_ version of this function is never called.
89+
* It is not clear why. Instead, we inline the real implementation of this
90+
* function, which consists of a single call to an internal implementation of
91+
* Printf.
92+
*/
93+
void __sanitizer_report_error_summary(const char *error_summary) {
94+
_ZN11__sanitizer6PrintfEPKcz(error_summary);
95+
if (sanitizer_death_callback == NULL) {
96+
return;
97+
}
98+
/*
99+
* Do not emit the input twice for ASan, which is always fatal.
100+
* TODO: This will change if we introduce --recover-asan.
101+
* TODO: Since we do not take the state of --recover-ubsan into account, we
102+
* dump the input twice with --recover-ubsan=false. This is harmless as it
103+
* only pollutes the verbose flags, but should still be fixed.
104+
*/
105+
if (strncmp(UBSAN_SUMMARY_PREFIX, error_summary, strlen(UBSAN_SUMMARY_PREFIX)) == 0) {
106+
sanitizer_death_callback();
107+
}
108+
}
109+
#endif
110+
111+
#ifdef __cplusplus
112+
}
113+
#endif

0 commit comments

Comments
 (0)
Please sign in to comment.