From 267cb28bc43dd865e711c2e61f31a838da04a5ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Wilczy=C5=84ski?= Date: Fri, 6 Oct 2023 20:28:44 +0200 Subject: [PATCH] test: Add a benchmark for deeply nested objects --- performance/nested_benchmark.py | 152 ++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 performance/nested_benchmark.py diff --git a/performance/nested_benchmark.py b/performance/nested_benchmark.py new file mode 100644 index 0000000..ce7f322 --- /dev/null +++ b/performance/nested_benchmark.py @@ -0,0 +1,152 @@ +"""Benchmark for Marshmallow serialization of a moderately complex object. + +""" +import argparse +import cProfile +import gc +import time +import timeit +from marshmallow import Schema, fields, ValidationError +from deepfriedmarshmallow import JitSchema + + +# Custom validator +def must_not_be_blank(data): + if not data: + raise ValidationError("Data not provided.") + + +def create_quotes_schema(jit): + SchemaBase = JitSchema if jit else Schema + + class AuthorSchema(SchemaBase): + class Meta: + jit_options = { + "no_callable_fields": True, + } + + id = fields.Int() + first = fields.Str() + last = fields.Str() + book_count = fields.Float() + age = fields.Float() + address = fields.Str() + deceased = fields.Boolean() + + def full_name(self, obj): + return obj.first + " " + obj.last + + def format_name(self, author): + return "{0}, {1}".format(author.last, author.first) + + class QuoteSchema(SchemaBase): + class Meta: + jit_options = { + "no_callable_fields": True, + "expected_marshal_type": "object", + } + + id = fields.Int() + authors = fields.Nested(AuthorSchema, many=True) + content = fields.Str(required=True) + posted_at = fields.Int() + book_name = fields.Str() + page_number = fields.Float() + line_number = fields.Float() + col_number = fields.Float() + is_verified = fields.Boolean() + + return QuoteSchema(many=True) + + +class Author: + def __init__(self, id, first, last, book_count, age, address, deceased): + self.id = id + self.first = first + self.last = last + self.book_count = book_count + self.age = age + self.address = address + self.deceased = deceased + + +class Quote: + def __init__(self, id, authors, content, posted_at, book_name, page_number, line_number, col_number, is_verified): + self.id = id + self.authors = authors + self.content = content + self.posted_at = posted_at + self.book_name = book_name + self.page_number = page_number + self.line_number = line_number + self.col_number = col_number + self.is_verified = is_verified + + +def run_timeit(quotes, iterations, repeat, jit=False, load=False, profile=False): + quotes_schema = create_quotes_schema(jit) + if profile: + profile = cProfile.Profile() + profile.enable() + dumped_quotes = quotes_schema.dump(quotes) + gc.collect() + + if load: + + def marshmallow_func(): + quotes_schema.load(dumped_quotes, many=True) + + else: + + def marshmallow_func(): + quotes_schema.dump(quotes) + + best = min(timeit.repeat(marshmallow_func, "gc.enable()", number=iterations, repeat=repeat)) + if profile: + profile.disable() + file_name = "optimized.pprof" if jit else "original.pprof" + profile.dump_stats(file_name) + + usec = best * 1e6 / iterations + return usec + + +def main(): + parser = argparse.ArgumentParser(description="Runs a benchmark of Marshmallow.") + parser.add_argument("--iterations", type=int, default=1000, help="Number of iterations to run per test.") + parser.add_argument( + "--repeat", + type=int, + default=5, + help="Number of times to repeat the performance test. The minimum will be used.", + ) + parser.add_argument("--object-count", type=int, default=20, help="Number of objects to dump.") + parser.add_argument("--authors-count", type=int, default=10, help="Number of author objects in each quote object.") + parser.add_argument( + "--profile", action="store_true", help="Whether or not to profile Marshmallow while running the benchmark." + ) + args = parser.parse_args() + + authors = [] + for i in range(args.authors_count): + authors.append(Author(i, "Foo", "Bar", 42, 66, "123 Fake St", False)) + + quotes = [] + for i in range(args.object_count): + quotes.append(Quote(i, authors, "Hello World", time.time(), "The World", 34, 3, 70, False)) + + print("Benchmark Result:") + original_dump_time = run_timeit(quotes, args.iterations, args.repeat, load=False, jit=False, profile=args.profile) + print("\tOriginal Dump Time: {0:.2f} usec/dump".format(original_dump_time)) + original_load_time = run_timeit(quotes, args.iterations, args.repeat, load=True, jit=False, profile=args.profile) + print("\tOriginal Load Time: {0:.2f} usec/load".format(original_load_time)) + optimized_dump_time = run_timeit(quotes, args.iterations, args.repeat, load=False, jit=True, profile=args.profile) + print("\tOptimized Dump Time: {0:.2f} usec/dump".format(optimized_dump_time)) + optimized_load_time = run_timeit(quotes, args.iterations, args.repeat, load=True, jit=True, profile=args.profile) + print("\tOptimized Load Time: {0:.2f} usec/load".format(optimized_load_time)) + print("\tSpeed up for dump: {0:.2f}x".format(original_dump_time / optimized_dump_time)) + print("\tSpeed up for load: {0:.2f}x".format(original_load_time / optimized_load_time)) + + +if __name__ == "__main__": + main()