CodeIntelligenceTesting · Nov 24, 2022
diff --git a/‎BUILD.bazel
Lines changed: 51 additions & 0 deletions b/‎BUILD.bazel
Lines changed: 51 additions & 0 deletions
diff --git a/‎LICENSE
Lines changed: 201 additions & 0 deletions b/‎LICENSE
Lines changed: 201 additions & 0 deletions
diff --git a/‎WORKSPACE b/‎WORKSPACE
diff --git a/‎include/cifuzz/cifuzz.h
Lines changed: 101 additions & 0 deletions b/‎include/cifuzz/cifuzz.h
Lines changed: 101 additions & 0 deletions
diff --git a/‎include/fuzzer/FuzzedDataProvider.h
Lines changed: 409 additions & 0 deletions b/‎include/fuzzer/FuzzedDataProvider.h
Lines changed: 409 additions & 0 deletions
diff --git a/‎src/dumper.cpp
Lines changed: 113 additions & 0 deletions b/‎src/dumper.cpp
Lines changed: 113 additions & 0 deletions
@@ -0,0 +1,51 @@
+load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
+bool_flag(
+    # Since this flag can be set from the command line, give it a very ugly name
+    # to discourage its use by users.
+    name = "__internal_has_libfuzzer",
+    build_setting_default = False,
+)
+
+config_setting(
+    name = "has_libfuzzer",
+    flag_values = {":__internal_has_libfuzzer": "True"},
+)
+
+selects.config_setting_group(
+    name = "on_linux_and_has_libfuzzer",
+    match_all = [
+        ":has_libfuzzer",
+        "@platforms//os:linux",
+    ],
+)
+
+cc_library(
+    name = "cifuzz",
+    srcs = select({
+        # We have to use dumper.cpp instead of dumper.c here as the non-Xcode
+        # toolchain on macOS needs -isysroot to be set to the macOS SDK path in
+        # order to find C headers. However, Bazel only offers BAZEL_CXXOPTS to
+        # set this flag in a way that affects the builtin include directory list
+        # compiled by the @local_config_cc auto-configured toolchain. There is
+        # no BAZEL_COPTS.
+        ":has_libfuzzer": ["src/dumper.cpp"],
+        "//conditions:default": [],
+    }),
+    hdrs = [
+        "include/cifuzz/cifuzz.h",
+        "include/fuzzer/FuzzedDataProvider.h",
+    ],
+    includes = ["include"],
+    linkopts = select({
+        # On Linux, we wrap the  __sanitizer_set_death_callback method to
+        # also dump sanitizer findings when they are non-fatal.
+        # See src/dumper.c for details.
+        ":on_linux_and_has_libfuzzer": ["-Wl,--wrap=__sanitizer_set_death_callback"],
+        "//conditions:default": [],
+    }),
+    linkstatic = True,
+    visibility = ["//visibility:public"],
+    alwayslink = True,
+)
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
@@ -0,0 +1,101 @@
+#ifndef CIFUZZ_CIFUZZ_H
+#define CIFUZZ_CIFUZZ_H
+
+/* Include the headers providing the definitions required to use FUZZ_TEST. */
+#ifdef __cplusplus
+#include <cstddef>
+#include <cstdint>
+#else
+#include <stddef.h>
+#include <stdint.h>
+#endif
+
+#if defined(__CLION_IDE__) && defined(__cplusplus)
+/* This code will only be seen by CLion's static analysis/preprocessing engine
+ * and thus doesn't have to contain any definitions, declarations are
+ * sufficient. It mocks enough of the Doctest classes to make CLion's test
+ * framework support treat it as the full Doctest library. */
+namespace doctest {
+namespace detail {
+struct TestSuite {};
+typedef int (*funcType)(const uint8_t *data, std::size_t size);
+struct TestCase
+{
+  TestCase(funcType test, const char* file, unsigned line, const TestSuite& test_suite,
+           const char* type = "", int template_id = -1);
+  TestCase& operator*(const char* in);
+};
+int regTest(const TestCase& tc);
+}
+}
+
+/* This macro has to be defined or CLion will not show a play button, but the
+ * value doesn't matter. */
+#define DOCTEST_TEST_CASE
+
+#define CLION_TEST_PLAY_BUTTON \
+/* Silence a CLion warning about a static
+ * initializer with static storage duration */  \
+/* NOLINTBEGIN(cert-err58-cpp) */               \
+static const int DOCTEST_ANON_VAR_15771531 =    \
+    doctest::detail::regTest(                   \
+        doctest::detail::TestCase(              \
+            &LLVMFuzzerTestOneInput,            \
+            "",                                 \
+            1,                                  \
+            doctest::detail::TestSuite()        \
+/* This string is used as the test name and has
+ * to be globally unique so that CLion
+ * generates a unique run configuration per
+ * test. */                                     \
+        ) * CIFUZZ_TEST_NAME);                  \
+/* NOLINTEND(cert-err58-cpp) */
+#else
+#define CLION_TEST_PLAY_BUTTON
+#endif
+
+#ifdef __cplusplus
+#define CIFUZZ_C_LINKAGE extern "C"
+#else
+#define CIFUZZ_C_LINKAGE
+#endif
+
+#ifndef CIFUZZ_TEST_NAME
+#define CIFUZZ_TEST_NAME NULL
+#endif
+#ifndef CIFUZZ_SEED_CORPUS
+#define CIFUZZ_SEED_CORPUS NULL
+#endif
+#ifndef CIFUZZ_GENERATED_CORPUS
+#define CIFUZZ_GENERATED_CORPUS NULL
+#endif
+
+#define FUZZ_TEST                                                                \
+static void LLVMFuzzerTestOneInputNoReturn(const uint8_t *data, size_t size);    \
+CIFUZZ_C_LINKAGE int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {  \
+  LLVMFuzzerTestOneInputNoReturn(data, size);                                    \
+  return 0;                                                                      \
+}                                                                                \
+CIFUZZ_C_LINKAGE const char *cifuzz_test_name(void) {                            \
+  return CIFUZZ_TEST_NAME;                                                       \
+}                                                                                \
+CIFUZZ_C_LINKAGE const char *cifuzz_seed_corpus(void) {                          \
+  return CIFUZZ_SEED_CORPUS;                                                     \
+}                                                                                \
+CIFUZZ_C_LINKAGE const char *cifuzz_generated_corpus(void) {                     \
+  return CIFUZZ_GENERATED_CORPUS;                                                \
+}                                                                                \
+CLION_TEST_PLAY_BUTTON                                                           \
+void LLVMFuzzerTestOneInputNoReturn
+
+#define FUZZ_TEST_SETUP                                              \
+static void LLVMFuzzerInitializeNoReturn(void);                      \
+CIFUZZ_C_LINKAGE int LLVMFuzzerInitialize(int *argc, char ***argv) { \
+  (void) argc;                                                       \
+  (void) argv;                                                       \
+  LLVMFuzzerInitializeNoReturn();                                    \
+  return 0;                                                          \
+}                                                                    \
+void LLVMFuzzerInitializeNoReturn
+
+#endif  // CIFUZZ_CIFUZZ_H
@@ -0,0 +1,409 @@
+// Based on:
+// https://github.com/llvm/llvm-project/blob/f3547fd541cac91c5ee281052584b05275ddc915/compiler-rt/include/fuzzer/FuzzedDataProvider.h
+
+// Modified by Fabian Meumertzheim:
+//   - added preprocessor check for C++11
+//
+//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// A single header library providing an utility class to break up an array of
+// bytes. Whenever run on the same input, provides the same output, as long as
+// its methods are called in the same order, with the same arguments.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
+#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
+
+// MSVC doesn't report C++11 compliance, see:
+// https://developercommunity.visualstudio.com/t/msvc-incorrectly-defines-cplusplus/139261
+#if __cplusplus < 201103L && !defined(_MSVC_LANG)
+#error "FuzzedDataProvider.h requires C++11 or higher"
+#endif
+
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <initializer_list>
+#include <limits>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+// In addition to the comments below, the API is also briefly documented at
+// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
+class FuzzedDataProvider {
+ public:
+  // |data| is an array of length |size| that the FuzzedDataProvider wraps to
+  // provide more granular access. |data| must outlive the FuzzedDataProvider.
+  FuzzedDataProvider(const uint8_t *data, size_t size)
+      : data_ptr_(data), remaining_bytes_(size) {}
+  ~FuzzedDataProvider() = default;
+
+  // See the implementation below (after the class definition) for more verbose
+  // comments for each of the methods.
+
+  // Methods returning std::vector of bytes. These are the most popular choice
+  // when splitting fuzzing input into pieces, as every piece is put into a
+  // separate buffer (i.e. ASan would catch any under-/overflow) and the memory
+  // will be released automatically.
+  template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes);
+  template <typename T>
+  std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, T terminator = 0);
+  template <typename T> std::vector<T> ConsumeRemainingBytes();
+
+  // Methods returning strings. Use only when you need a std::string or a null
+  // terminated C-string. Otherwise, prefer the methods returning std::vector.
+  std::string ConsumeBytesAsString(size_t num_bytes);
+  std::string ConsumeRandomLengthString(size_t max_length);
+  std::string ConsumeRandomLengthString();
+  std::string ConsumeRemainingBytesAsString();
+
+  // Methods returning integer values.
+  template <typename T> T ConsumeIntegral();
+  template <typename T> T ConsumeIntegralInRange(T min, T max);
+
+  // Methods returning floating point values.
+  template <typename T> T ConsumeFloatingPoint();
+  template <typename T> T ConsumeFloatingPointInRange(T min, T max);
+
+  // 0 <= return value <= 1.
+  template <typename T> T ConsumeProbability();
+
+  bool ConsumeBool();
+
+  // Returns a value chosen from the given enum.
+  template <typename T> T ConsumeEnum();
+
+  // Returns a value from the given array.
+  template <typename T, size_t size> T PickValueInArray(const T (&array)[size]);
+  template <typename T, size_t size>
+  T PickValueInArray(const std::array<T, size> &array);
+  template <typename T> T PickValueInArray(std::initializer_list<const T> list);
+
+  // Writes data to the given destination and returns number of bytes written.
+  size_t ConsumeData(void *destination, size_t num_bytes);
+
+  // Reports the remaining bytes available for fuzzed input.
+  size_t remaining_bytes() { return remaining_bytes_; }
+
+ private:
+  FuzzedDataProvider(const FuzzedDataProvider &) = delete;
+  FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
+
+  void CopyAndAdvance(void *destination, size_t num_bytes);
+
+  void Advance(size_t num_bytes);
+
+  template <typename T>
+  std::vector<T> ConsumeBytes(size_t size, size_t num_bytes);
+
+  template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value);
+
+  const uint8_t *data_ptr_;
+  size_t remaining_bytes_;
+};
+
+// Returns a std::vector containing |num_bytes| of input data. If fewer than
+// |num_bytes| of data remain, returns a shorter std::vector containing all
+// of the data that's left. Can be used with any byte sized type, such as
+// char, unsigned char, uint8_t, etc.
+template <typename T>
+std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t num_bytes) {
+  num_bytes = std::min(num_bytes, remaining_bytes_);
+  return ConsumeBytes<T>(num_bytes, num_bytes);
+}
+
+// Similar to |ConsumeBytes|, but also appends the terminator value at the end
+// of the resulting vector. Useful, when a mutable null-terminated C-string is
+// needed, for example. But that is a rare case. Better avoid it, if possible,
+// and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
+template <typename T>
+std::vector<T> FuzzedDataProvider::ConsumeBytesWithTerminator(size_t num_bytes,
+                                                              T terminator) {
+  num_bytes = std::min(num_bytes, remaining_bytes_);
+  std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
+  result.back() = terminator;
+  return result;
+}
+
+// Returns a std::vector containing all remaining bytes of the input data.
+template <typename T>
+std::vector<T> FuzzedDataProvider::ConsumeRemainingBytes() {
+  return ConsumeBytes<T>(remaining_bytes_);
+}
+
+// Returns a std::string containing |num_bytes| of input data. Using this and
+// |.c_str()| on the resulting string is the best way to get an immutable
+// null-terminated C string. If fewer than |num_bytes| of data remain, returns
+// a shorter std::string containing all of the data that's left.
+inline std::string FuzzedDataProvider::ConsumeBytesAsString(size_t num_bytes) {
+  static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
+                "ConsumeBytesAsString cannot convert the data to a string.");
+
+  num_bytes = std::min(num_bytes, remaining_bytes_);
+  std::string result(
+      reinterpret_cast<const std::string::value_type *>(data_ptr_), num_bytes);
+  Advance(num_bytes);
+  return result;
+}
+
+// Returns a std::string of length from 0 to |max_length|. When it runs out of
+// input data, returns what remains of the input. Designed to be more stable
+// with respect to a fuzzer inserting characters than just picking a random
+// length and then consuming that many bytes with |ConsumeBytes|.
+inline std::string
+FuzzedDataProvider::ConsumeRandomLengthString(size_t max_length) {
+  // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
+  // followed by anything else to the end of the string. As a result of this
+  // logic, a fuzzer can insert characters into the string, and the string
+  // will be lengthened to include those new characters, resulting in a more
+  // stable fuzzer than picking the length of a string independently from
+  // picking its contents.
+  std::string result;
+
+  // Reserve the anticipated capaticity to prevent several reallocations.
+  result.reserve(std::min(max_length, remaining_bytes_));
+  for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
+    char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
+    Advance(1);
+    if (next == '\\' && remaining_bytes_ != 0) {
+      next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
+      Advance(1);
+      if (next != '\\')
+        break;
+    }
+    result += next;
+  }
+
+  result.shrink_to_fit();
+  return result;
+}
+
+// Returns a std::string of length from 0 to |remaining_bytes_|.
+inline std::string FuzzedDataProvider::ConsumeRandomLengthString() {
+  return ConsumeRandomLengthString(remaining_bytes_);
+}
+
+// Returns a std::string containing all remaining bytes of the input data.
+// Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
+// object.
+inline std::string FuzzedDataProvider::ConsumeRemainingBytesAsString() {
+  return ConsumeBytesAsString(remaining_bytes_);
+}
+
+// Returns a number in the range [Type's min, Type's max]. The value might
+// not be uniformly distributed in the given range. If there's no input data
+// left, always returns |min|.
+template <typename T> T FuzzedDataProvider::ConsumeIntegral() {
+  return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
+                                std::numeric_limits<T>::max());
+}
+
+// Returns a number in the range [min, max] by consuming bytes from the
+// input data. The value might not be uniformly distributed in the given
+// range. If there's no input data left, always returns |min|. |min| must
+// be less than or equal to |max|.
+template <typename T>
+T FuzzedDataProvider::ConsumeIntegralInRange(T min, T max) {
+  static_assert(std::is_integral<T>::value, "An integral type is required.");
+  static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
+
+  if (min > max)
+    abort();
+
+  // Use the biggest type possible to hold the range and the result.
+  uint64_t range = static_cast<uint64_t>(max) - min;
+  uint64_t result = 0;
+  size_t offset = 0;
+
+  while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
+         remaining_bytes_ != 0) {
+    // Pull bytes off the end of the seed data. Experimentally, this seems to
+    // allow the fuzzer to more easily explore the input space. This makes
+    // sense, since it works by modifying inputs that caused new code to run,
+    // and this data is often used to encode length of data read by
+    // |ConsumeBytes|. Separating out read lengths makes it easier modify the
+    // contents of the data that is actually read.
+    --remaining_bytes_;
+    result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
+    offset += CHAR_BIT;
+  }
+
+  // Avoid division by 0, in case |range + 1| results in overflow.
+  if (range != std::numeric_limits<decltype(range)>::max())
+    result = result % (range + 1);
+
+  return static_cast<T>(min + result);
+}
+
+// Returns a floating point value in the range [Type's lowest, Type's max] by
+// consuming bytes from the input data. If there's no input data left, always
+// returns approximately 0.
+template <typename T> T FuzzedDataProvider::ConsumeFloatingPoint() {
+  return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
+                                        std::numeric_limits<T>::max());
+}
+
+// Returns a floating point value in the given range by consuming bytes from
+// the input data. If there's no input data left, returns |min|. Note that
+// |min| must be less than or equal to |max|.
+template <typename T>
+T FuzzedDataProvider::ConsumeFloatingPointInRange(T min, T max) {
+  if (min > max)
+    abort();
+
+  T range = .0;
+  T result = min;
+  constexpr T zero(.0);
+  if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
+    // The diff |max - min| would overflow the given floating point type. Use
+    // the half of the diff as the range and consume a bool to decide whether
+    // the result is in the first of the second part of the diff.
+    range = (max / 2.0) - (min / 2.0);
+    if (ConsumeBool()) {
+      result += range;
+    }
+  } else {
+    range = max - min;
+  }
+
+  return result + range * ConsumeProbability<T>();
+}
+
+// Returns a floating point number in the range [0.0, 1.0]. If there's no
+// input data left, always returns 0.
+template <typename T> T FuzzedDataProvider::ConsumeProbability() {
+  static_assert(std::is_floating_point<T>::value,
+                "A floating point type is required.");
+
+  // Use different integral types for different floating point types in order
+  // to provide better density of the resulting values.
+  using IntegralType =
+      typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
+                                uint64_t>::type;
+
+  T result = static_cast<T>(ConsumeIntegral<IntegralType>());
+  result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
+  return result;
+}
+
+// Reads one byte and returns a bool, or false when no data remains.
+inline bool FuzzedDataProvider::ConsumeBool() {
+  return 1 & ConsumeIntegral<uint8_t>();
+}
+
+// Returns an enum value. The enum must start at 0 and be contiguous. It must
+// also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
+// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
+template <typename T> T FuzzedDataProvider::ConsumeEnum() {
+  static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
+  return static_cast<T>(
+      ConsumeIntegralInRange<uint32_t>(0, static_cast<uint32_t>(T::kMaxValue)));
+}
+
+// Returns a copy of the value selected from the given fixed-size |array|.
+template <typename T, size_t size>
+T FuzzedDataProvider::PickValueInArray(const T (&array)[size]) {
+  static_assert(size > 0, "The array must be non empty.");
+  return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
+}
+
+template <typename T, size_t size>
+T FuzzedDataProvider::PickValueInArray(const std::array<T, size> &array) {
+  static_assert(size > 0, "The array must be non empty.");
+  return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
+}
+
+template <typename T>
+T FuzzedDataProvider::PickValueInArray(std::initializer_list<const T> list) {
+  // TODO(Dor1s): switch to static_assert once C++14 is allowed.
+  if (!list.size())
+    abort();
+
+  return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
+}
+
+// Writes |num_bytes| of input data to the given destination pointer. If there
+// is not enough data left, writes all remaining bytes. Return value is the
+// number of bytes written.
+// In general, it's better to avoid using this function, but it may be useful
+// in cases when it's necessary to fill a certain buffer or object with
+// fuzzing data.
+inline size_t FuzzedDataProvider::ConsumeData(void *destination,
+                                              size_t num_bytes) {
+  num_bytes = std::min(num_bytes, remaining_bytes_);
+  CopyAndAdvance(destination, num_bytes);
+  return num_bytes;
+}
+
+// Private methods.
+inline void FuzzedDataProvider::CopyAndAdvance(void *destination,
+                                               size_t num_bytes) {
+  std::memcpy(destination, data_ptr_, num_bytes);
+  Advance(num_bytes);
+}
+
+inline void FuzzedDataProvider::Advance(size_t num_bytes) {
+  if (num_bytes > remaining_bytes_)
+    abort();
+
+  data_ptr_ += num_bytes;
+  remaining_bytes_ -= num_bytes;
+}
+
+template <typename T>
+std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t size, size_t num_bytes) {
+  static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
+
+  // The point of using the size-based constructor below is to increase the
+  // odds of having a vector object with capacity being equal to the length.
+  // That part is always implementation specific, but at least both libc++ and
+  // libstdc++ allocate the requested number of bytes in that constructor,
+  // which seems to be a natural choice for other implementations as well.
+  // To increase the odds even more, we also call |shrink_to_fit| below.
+  std::vector<T> result(size);
+  if (size == 0) {
+    if (num_bytes != 0)
+      abort();
+    return result;
+  }
+
+  CopyAndAdvance(result.data(), num_bytes);
+
+  // Even though |shrink_to_fit| is also implementation specific, we expect it
+  // to provide an additional assurance in case vector's constructor allocated
+  // a buffer which is larger than the actual amount of data we put inside it.
+  result.shrink_to_fit();
+  return result;
+}
+
+template <typename TS, typename TU>
+TS FuzzedDataProvider::ConvertUnsignedToSigned(TU value) {
+  static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
+  static_assert(!std::numeric_limits<TU>::is_signed,
+                "Source type must be unsigned.");
+
+  // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
+  if (std::numeric_limits<TS>::is_modulo)
+    return static_cast<TS>(value);
+
+  // Avoid using implementation-defined unsigned to signed conversions.
+  // To learn more, see https://stackoverflow.com/questions/13150449.
+  if (value <= std::numeric_limits<TS>::max()) {
+    return static_cast<TS>(value);
+  } else {
+    constexpr auto TS_min = std::numeric_limits<TS>::min();
+    return TS_min + static_cast<TS>(value - TS_min);
+  }
+}
+
+#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
@@ -0,0 +1,113 @@
+#include <stddef.h>
+#include <string.h>
+
+#ifdef __APPLE__
+#include <dlfcn.h>
+#include <pthread.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static const char UBSAN_SUMMARY_PREFIX[] = "SUMMARY: UndefinedBehaviorSanitizer:";
+static void (*sanitizer_death_callback)(void) = NULL;
+
+/*
+ * By linking this file into a fuzz test (and adding a linker flag on Linux),
+ * non-fatal sanitizer findings will still write an input to disk.
+ *
+ * For both macOS and Linux, we hook:
+ * - __sanitizer_set_death_callback, to which libFuzzer provides a callback that
+ *   can be used to dump the current input;
+ * - __sanitizer_report_error_summary, which is executed by all sanitizers on a
+ *   finding, regardless of whether it is fatal. Since this function is provided
+ *   the summary line, we can call __sanitizer_set_death_callback only if
+ *   needed.
+ */
+#ifdef __APPLE__
+/*
+ * On macOS, sanitizers are exclusively linked dynamically, which allows us to
+ * wrap functions simply by defining them and looking up the original function
+ * via dlsym(RTLD_NEXT, ...). We can't use the --wrap linker flag since the
+ * macOS linker doesn't support it.
+ */
+void __sanitizer_set_death_callback(void (*callback)(void)) {
+  sanitizer_death_callback = callback;
+  void *real_sanitizer_set_death_callback =
+    dlsym(RTLD_NEXT, "__sanitizer_set_death_callback");
+  ((void (*)(void (*)()))(real_sanitizer_set_death_callback))(callback);
+}
+
+/*
+ * Ensure that ASan's verify_interceptors check passes: It checks that puts and
+ * __sanitizer_report_error_summary are defined in the same object. Since we
+ * hook the latter, we also have to (trivially) hook the former.
+ * https://github.com/llvm/llvm-project/blob/f8a469fc572778d05b72f34a772082cf3abd3cda/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp#L987-L993
+ * Older versions of LLVM check for pthread_create instead:
+ * https://github.com/llvm/llvm-project/blob/abc51fac09593ec048b3b298fa274af823e0a22d/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp#L1061-L1067
+ */
+int puts(const char *str) {
+  void *real_puts = dlsym(RTLD_NEXT, "puts");
+  return ((int (*)(const char *))(real_puts))(str);
+}
+
+int pthread_create(pthread_t *thread,
+                   const pthread_attr_t *attr,
+                   void *(*start_routine)(void *),
+                   void *arg) {
+  void *real_pthread_create = dlsym(RTLD_NEXT, "pthread_create");
+  return ((int (*)(pthread_t *, const pthread_attr_t *, void *(*)(void *), void *))(real_pthread_create))(thread, attr, start_routine, arg);
+}
+
+void __sanitizer_report_error_summary(const char *error_summary) {
+  void *real_sanitizer_report_error_summary =
+    dlsym(RTLD_NEXT, "__sanitizer_report_error_summary");
+  ((void (*)(const char *))(real_sanitizer_report_error_summary))(error_summary);
+  if (strncmp(UBSAN_SUMMARY_PREFIX, error_summary, strlen(UBSAN_SUMMARY_PREFIX)) == 0) {
+    sanitizer_death_callback();
+  }
+}
+#else
+/*
+ * On Linux, the --wrap flag of GNU ld can be used to wrap all calls to a given
+ * function. We can't use the macOS approach as sanitizer runtimes can be linked
+ * statically.
+ */
+void __real___sanitizer_set_death_callback(void (*callback)(void));
+
+void __wrap___sanitizer_set_death_callback(void (*callback)(void)) {
+  sanitizer_death_callback = callback;
+  __real___sanitizer_set_death_callback(callback);
+}
+
+/* clang mangling applied to __sanitizer::Printf(const char *format, ...) */
+void _ZN11__sanitizer6PrintfEPKcz(const char *format, ...);
+
+/*
+ * If wrapped with --wrap, the __wrap_ version of this function is never called.
+ * It is not clear why. Instead, we inline the real implementation of this
+ * function, which consists of a single call to an internal implementation of
+ * Printf.
+ */
+void __sanitizer_report_error_summary(const char *error_summary) {
+  _ZN11__sanitizer6PrintfEPKcz(error_summary);
+  if (sanitizer_death_callback == NULL) {
+    return;
+  }
+  /*
+   * Do not emit the input twice for ASan, which is always fatal.
+   * TODO: This will change if we introduce --recover-asan.
+   * TODO: Since we do not take the state of --recover-ubsan into account, we
+   *  dump the input twice with --recover-ubsan=false. This is harmless as it
+   *  only pollutes the verbose flags, but should still be fixed.
+   */
+  if (strncmp(UBSAN_SUMMARY_PREFIX, error_summary, strlen(UBSAN_SUMMARY_PREFIX)) == 0) {
+    sanitizer_death_callback();
+  }
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif