diff --git a/dev/fuzzing/build.sh b/dev/fuzzing/build.sh new file mode 100755 index 000000000..1794e5c49 --- /dev/null +++ b/dev/fuzzing/build.sh @@ -0,0 +1,8 @@ +cd "$SRC"/XlsxWriter +pip3 install . + +# Build fuzzers in $OUT +for fuzzer in $(find dev/fuzzing -name '*_fuzzer.py');do + compile_python_fuzzer "$fuzzer" +done +zip -q $OUT/xlsx_fuzzer_seed_corpus.zip $SRC/corpus/* diff --git a/dev/fuzzing/corpus/test_corpus b/dev/fuzzing/corpus/test_corpus new file mode 100644 index 000000000..21380b074 --- /dev/null +++ b/dev/fuzzing/corpus/test_corpus @@ -0,0 +1 @@ +=IF(A1 > B1, A1 * 1.5, B1 * 2) \ No newline at end of file diff --git a/dev/fuzzing/fuzz_helpers.py b/dev/fuzzing/fuzz_helpers.py new file mode 100644 index 000000000..860d4cb86 --- /dev/null +++ b/dev/fuzzing/fuzz_helpers.py @@ -0,0 +1,45 @@ +import io +import tempfile +import atheris +import contextlib +from typing import List, Set, Dict, Tuple, Any + + +class EnhancedFuzzedDataProvider(atheris.FuzzedDataProvider): + def ConsumeRandomBytes(self) -> bytes: + return self.ConsumeBytes(self.ConsumeIntInRange(0, self.remaining_bytes())) + + def ConsumeRandomString(self) -> str: + return self.ConsumeUnicodeNoSurrogates(self.ConsumeIntInRange(0, self.remaining_bytes())) + + def ConsumeRemainingString(self) -> str: + return self.ConsumeUnicodeNoSurrogates(self.remaining_bytes()) + + def ConsumeRemainingBytes(self) -> bytes: + return self.ConsumeBytes(self.remaining_bytes()) + + @contextlib.contextmanager + def ConsumeMemoryFile(self, all_data: bool = False, as_bytes: bool = True) -> io.BytesIO: + if all_data: + file_data = self.ConsumeRemainingBytes() if as_bytes else self.ConsumeRemainingString() + else: + file_data = self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString() + + file = io.BytesIO(file_data) if as_bytes else io.StringIO(file_data) + yield file + file.close() + + @contextlib.contextmanager + def ConsumeTemporaryFile(self, suffix: str, all_data: bool = False, as_bytes: bool = True) -> str: + if all_data: + file_data = self.ConsumeRemainingBytes() if as_bytes else self.ConsumeRemainingString() + else: + file_data = self.ConsumeRandomBytes() if as_bytes else self.ConsumeRandomString() + + mode = 'w+b' if as_bytes else 'w+' + tfile = tempfile.NamedTemporaryFile(mode=mode, suffix=suffix) + tfile.write(file_data) + tfile.seek(0) + tfile.flush() + yield tfile.name + tfile.close() diff --git a/dev/fuzzing/xlsx_fuzzer.py b/dev/fuzzing/xlsx_fuzzer.py new file mode 100644 index 000000000..49a5e9e7b --- /dev/null +++ b/dev/fuzzing/xlsx_fuzzer.py @@ -0,0 +1,58 @@ +from enum import Enum + +import atheris +import sys +from io import BytesIO + +from fuzz_helpers import EnhancedFuzzedDataProvider +import struct + +with atheris.instrument_imports(include=['xlsxwriter']): + import xlsxwriter + import xlsxwriter.worksheet + from xlsxwriter.exceptions import XlsxWriterException + + +class FuncChoice(Enum): + WRITE_STRING = 0 + WRITE_NUMBER = 1 + WRITE_FORMULA = 2 + + +choices = [FuncChoice.WRITE_STRING, FuncChoice.WRITE_NUMBER, FuncChoice.WRITE_FORMULA] + + +def TestOneInput(data): + fdp = EnhancedFuzzedDataProvider(data) + + try: + out = BytesIO() + with xlsxwriter.Workbook(out) as wb: + ws = wb.add_worksheet() + + data = fdp.ConsumeRandomString() + func_choice = fdp.PickValueInList(choices) + + for row in range(fdp.ConsumeIntInRange(0, 10)): + for col in range(fdp.ConsumeIntInRange(0, 10)): + if func_choice is FuncChoice.WRITE_STRING: + ws.write_string(row, col, data) + elif func_choice is FuncChoice.WRITE_NUMBER: + ws.write_number(row, col, data) + else: + ws.write_formula(row, col, data) + except (XlsxWriterException, struct.error): + return -1 + except TypeError as e: + if 'must be real number' in str(e): + return -1 + raise e + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main()