From c9862955747112016fce8bd274298f0d8b477cd9 Mon Sep 17 00:00:00 2001 From: David Lopes Date: Thu, 6 Jun 2024 16:19:37 -0500 Subject: [PATCH] Fix encodnig to dict for events (#65) * Make events behave similar to the EEC while on debug * # 50 - Fix event batch size calculation We can't use len(dict) because that returns the number of items in the dict, not the size in bytes We need to use len(f"{dict}".encode()) Which is of course more expensive but it is the only way to get the data in bytes Maybe a better way would be to split the list in half iteratively until we have a list that is less than the size Because otherwise we need to calculate this for every single item in the list, which is too much * Rework batch logic * Fix #50 * Fix #52 Add tests for splitting metrics and events * Fix debug client events when sending a single event * encode to json and add test * fix test --------- Co-authored-by: David Lopes --- dynatrace_extension/__about__.py | 2 +- dynatrace_extension/sdk/communication.py | 4 +++- tests/sdk/test_communication.py | 23 ++++++++++++++++++++--- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/dynatrace_extension/__about__.py b/dynatrace_extension/__about__.py index b02d040..8458ace 100644 --- a/dynatrace_extension/__about__.py +++ b/dynatrace_extension/__about__.py @@ -2,4 +2,4 @@ # # SPDX-License-Identifier: MIT -__version__ = "1.1.21" +__version__ = "1.1.22" diff --git a/dynatrace_extension/sdk/communication.py b/dynatrace_extension/sdk/communication.py index 920089c..78c621e 100644 --- a/dynatrace_extension/sdk/communication.py +++ b/dynatrace_extension/sdk/communication.py @@ -446,7 +446,9 @@ def divide_into_batches(items: Sequence[dict | str], max_size_bytes: int, join_w if join_with is not None: items = join_with.join(items) - encoded = f"{items}".encode(errors="replace") + encoded = f"{items}".encode(errors="replace") + else: + encoded = json.dumps(items).encode(errors="replace") size = len(encoded) if size <= max_size_bytes: yield encoded diff --git a/tests/sdk/test_communication.py b/tests/sdk/test_communication.py index 9f06e76..13d307b 100644 --- a/tests/sdk/test_communication.py +++ b/tests/sdk/test_communication.py @@ -1,3 +1,4 @@ +import json import unittest from unittest.mock import MagicMock, mock_open, patch @@ -53,13 +54,13 @@ def test_small_log_chunk(self): def test_large_metric_chunk(self): - metrics = ['my.metric,dim="dim" 10'] * 500 * 100 # 1_300_000 bytes, but becomes 1_149_999 with the newlines + metrics = ['my.metric,dim="dim" 10'] * 500 * 100 # it needs to be divided into 2 lists, each with 650_000 bytes chunks = list(divide_into_batches(metrics, MAX_METRIC_REQUEST_SIZE, "\n")) self.assertEqual(len(chunks), 2) - self.assertEqual(len(chunks[0]), 574999) - self.assertEqual(len(chunks[1]), 575000) + self.assertEqual(len(chunks[0]), 650000) + self.assertEqual(len(chunks[1]), 650002) def test_small_metric_chunk(self): metrics = ['my.metric,dim="dim" 10'] * 100 @@ -73,3 +74,19 @@ def test_no_metrics(self): chunks = list(divide_into_batches(metrics, MAX_METRIC_REQUEST_SIZE, "\n")) self.assertEqual(len(chunks), 0) + + def test_large_log_chunk_valid_json(self): + + events = [] + for i in range(5000): + attributes = {} + for j in range(150): + attributes[f"attribute{j}"] = j + events.append(attributes) + + # it needs to be divided into 4 lists, each with 3_665_000 bytes + chunks = list(divide_into_batches(events, MAX_LOG_REQUEST_SIZE)) + self.assertEqual(len(chunks), 4) + + for chunk in chunks: + json.loads(chunk)