From c9862955747112016fce8bd274298f0d8b477cd9 Mon Sep 17 00:00:00 2001
From: David Lopes <davidribeirolopes@gmail.com>
Date: Thu, 6 Jun 2024 16:19:37 -0500
Subject: [PATCH] Fix encodnig to dict for events (#65)

* Make events behave similar to the EEC while on debug

* # 50 - Fix event batch size calculation

We can't use len(dict) because that returns the number of items in the dict, not the size in bytes

We need to use len(f"{dict}".encode())

Which is of course more expensive but it is the only way to get the data in bytes

Maybe a better way would be to split the list in half iteratively until we have a list that is less than the size

Because otherwise we need to calculate this for every single item in the list, which is too much

* Rework batch logic

* Fix #50
* Fix #52

Add tests for splitting metrics and events

* Fix debug client events when sending a single event

* encode to json and add test

* fix test

---------

Co-authored-by: David Lopes <david.lopes@dynatrace.com>
---
 dynatrace_extension/__about__.py         |  2 +-
 dynatrace_extension/sdk/communication.py |  4 +++-
 tests/sdk/test_communication.py          | 23 ++++++++++++++++++++---
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/dynatrace_extension/__about__.py b/dynatrace_extension/__about__.py
index b02d040..8458ace 100644
--- a/dynatrace_extension/__about__.py
+++ b/dynatrace_extension/__about__.py
@@ -2,4 +2,4 @@
 #
 # SPDX-License-Identifier: MIT
 
-__version__ = "1.1.21"
+__version__ = "1.1.22"
diff --git a/dynatrace_extension/sdk/communication.py b/dynatrace_extension/sdk/communication.py
index 920089c..78c621e 100644
--- a/dynatrace_extension/sdk/communication.py
+++ b/dynatrace_extension/sdk/communication.py
@@ -446,7 +446,9 @@ def divide_into_batches(items: Sequence[dict | str], max_size_bytes: int, join_w
 
     if join_with is not None:
         items = join_with.join(items)
-    encoded = f"{items}".encode(errors="replace")
+        encoded = f"{items}".encode(errors="replace")
+    else:
+        encoded = json.dumps(items).encode(errors="replace")
     size = len(encoded)
     if size <= max_size_bytes:
         yield encoded
diff --git a/tests/sdk/test_communication.py b/tests/sdk/test_communication.py
index 9f06e76..13d307b 100644
--- a/tests/sdk/test_communication.py
+++ b/tests/sdk/test_communication.py
@@ -1,3 +1,4 @@
+import json
 import unittest
 from unittest.mock import MagicMock, mock_open, patch
 
@@ -53,13 +54,13 @@ def test_small_log_chunk(self):
 
     def test_large_metric_chunk(self):
 
-        metrics = ['my.metric,dim="dim" 10'] * 500 * 100  # 1_300_000 bytes, but becomes 1_149_999 with the newlines
+        metrics = ['my.metric,dim="dim" 10'] * 500 * 100
 
         # it needs to be divided into 2 lists, each with 650_000 bytes
         chunks = list(divide_into_batches(metrics, MAX_METRIC_REQUEST_SIZE, "\n"))
         self.assertEqual(len(chunks), 2)
-        self.assertEqual(len(chunks[0]), 574999)
-        self.assertEqual(len(chunks[1]), 575000)
+        self.assertEqual(len(chunks[0]), 650000)
+        self.assertEqual(len(chunks[1]), 650002)
 
     def test_small_metric_chunk(self):
         metrics = ['my.metric,dim="dim" 10'] * 100
@@ -73,3 +74,19 @@ def test_no_metrics(self):
 
         chunks = list(divide_into_batches(metrics, MAX_METRIC_REQUEST_SIZE, "\n"))
         self.assertEqual(len(chunks), 0)
+
+    def test_large_log_chunk_valid_json(self):
+
+        events = []
+        for i in range(5000):
+            attributes = {}
+            for j in range(150):
+                attributes[f"attribute{j}"] = j
+            events.append(attributes)
+
+        # it needs to be divided into 4 lists, each with 3_665_000 bytes
+        chunks = list(divide_into_batches(events, MAX_LOG_REQUEST_SIZE))
+        self.assertEqual(len(chunks), 4)
+
+        for chunk in chunks:
+            json.loads(chunk)