From eddb78995544e35e6248017c6f2e5bd02e47bed8 Mon Sep 17 00:00:00 2001 From: rusticluftig Date: Thu, 28 Mar 2024 21:32:58 -0700 Subject: [PATCH] Remove the unnecessary model_config to reduce the serialized message size --- common/data.py | 9 ++++++--- storage/miner/sqlite_miner_storage.py | 10 ++++++---- tests/common/test_protocol.py | 17 +++++++++++++++-- .../storage/miner/test_sqlite_miner_storage.py | 12 +++++++----- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/common/data.py b/common/data.py index 0bec5ea6..69496e43 100644 --- a/common/data.py +++ b/common/data.py @@ -29,7 +29,8 @@ class TimeBucket(StrictBaseModel): """Represents a specific time bucket in the linear flow of time.""" # Makes the object "Immutable" once created. - model_config = ConfigDict(frozen=True) + class Config: + frozen = True id: PositiveInt = Field( description="Monotonically increasing value idenitifying the given time bucket" @@ -105,7 +106,8 @@ class DataEntity(StrictBaseModel): """A logical unit of data that has been scraped. E.g. a Reddit post""" # Makes the object "Immutable" once created. - model_config = ConfigDict(frozen=True) + class Config: + frozen = True # Path from which the entity was generated. uri: str @@ -136,7 +138,8 @@ class DataEntityBucketId(StrictBaseModel): """Uniquely identifies a bucket to group DataEntities by time bucket, source, and label.""" # Makes the object "Immutable" once created. - model_config = ConfigDict(frozen=True) + class Config: + frozen = True time_bucket: TimeBucket source: DataSource = Field() diff --git a/storage/miner/sqlite_miner_storage.py b/storage/miner/sqlite_miner_storage.py index 947d15c1..3afa3145 100644 --- a/storage/miner/sqlite_miner_storage.py +++ b/storage/miner/sqlite_miner_storage.py @@ -225,6 +225,11 @@ def list_data_entities_in_data_entity_bucket( # If we would go over the max DataEntityBucket size instead return early. return data_entities else: + # Add the optional Label field if not null. + label = None + if row["label"] != "NULL": + label = DataLabel(value=row["label"]) + # Construct the new DataEntity with all non null columns. data_entity = DataEntity( uri=row["uri"], @@ -232,12 +237,9 @@ def list_data_entities_in_data_entity_bucket( source=DataSource(row["source"]), content=row["content"], content_size_bytes=row["contentSizeBytes"], + label=label, ) - # Add the optional Label field if not null. - if row["label"] != "NULL": - data_entity.label = DataLabel(value=row["label"]) - data_entities.append(data_entity) running_size += row["contentSizeBytes"] diff --git a/tests/common/test_protocol.py b/tests/common/test_protocol.py index 511b633f..58e0c017 100644 --- a/tests/common/test_protocol.py +++ b/tests/common/test_protocol.py @@ -26,7 +26,7 @@ def serialize_like_dendrite(synapse: bt.Synapse) -> str: def serialize_like_axon(synapse: bt.Synapse) -> str: """Serializes a synapse like an Axon would.""" - return serialize_like_dendrite(synapse) + return synapse.json() def deserialize(json_str: str, cls: Type) -> bt.Synapse: @@ -133,7 +133,20 @@ def test_synapse_serialization(self): # Also check that the headers can be constructed. request.to_headers() - # TODO: Add a test for the response. + response = request.copy() + response.data_entities = [ + DataEntity( + uri=f"http://uri/{i}", + content=b"Hello, world!", + datetime=dt.datetime.utcnow(), + label=DataLabel(value="r/bittensor_"), + source=DataSource.REDDIT, + content_size_bytes=13, + ) + for i in range(350_000) + ] + response_json = serialize_like_axon(response) + print(len(response_json)) if __name__ == "__main__": diff --git a/tests/storage/miner/test_sqlite_miner_storage.py b/tests/storage/miner/test_sqlite_miner_storage.py index 63884518..a124021e 100644 --- a/tests/storage/miner/test_sqlite_miner_storage.py +++ b/tests/storage/miner/test_sqlite_miner_storage.py @@ -94,13 +94,15 @@ def test_store_identical_entities(self): self.test_storage.store_data_entities([entity1, entity2]) # Update the contents - entity1.content = bytes(50) - entity1.content_size_bytes = 50 - entity2.content = bytes(100) - entity2.content_size_bytes = 100 + entity1_copy = entity1.copy( + update={"content": bytes(50), "content_size_bytes": 50} + ) + entity2_copy = entity2.copy( + update={"content": bytes(100), "content_size_bytes": 100} + ) # Store the entities again. - self.test_storage.store_data_entities([entity1, entity2]) + self.test_storage.store_data_entities([entity1_copy, entity2_copy]) # Confirm that only one set of entities were stored and the content matches the latest. with contextlib.closing(self.test_storage._create_connection()) as connection: