From 6e608d96ef5503b6f923c6cc3a6aa58b7660070e Mon Sep 17 00:00:00 2001 From: "Stephen C. Pope" Date: Fri, 12 Jan 2024 11:39:40 -0700 Subject: [PATCH] [Core-240] Catalog Client: Add Blob.delete_many method (#12398) GitOrigin-RevId: b5c9b08ba13ae906ca03473b7611909c9678a6cc --- README.md | 1 + descarteslabs/core/catalog/blob.py | 67 ++++++++++++++++++++++- descarteslabs/core/catalog/blob_delete.py | 31 +++++++++++ descarteslabs/core/catalog/product.py | 2 + 4 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 descarteslabs/core/catalog/blob_delete.py diff --git a/README.md b/README.md index 09a2facd..f809da9e 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ Changelog But why would you even want to go there? - *Breaking Change*: Derived bands, never supported in the AWS environment and catalog products, have been removed. +- The new `Blob.delete_many` method may be used to delete large numbers of blobs efficiently. ### Compute diff --git a/descarteslabs/core/catalog/blob.py b/descarteslabs/core/catalog/blob.py index cdb0d66e..a077be30 100644 --- a/descarteslabs/core/catalog/blob.py +++ b/descarteslabs/core/catalog/blob.py @@ -31,6 +31,7 @@ TypedAttribute, parse_iso_datetime, ) +from .blob_delete import BlobDelete from .blob_download import BlobDownload from .catalog_base import CatalogClient, CatalogObject, check_deleted from .search import AggregateDateField, GeoSearch, SummarySearchMixin @@ -780,6 +781,8 @@ def get_data( name=None, client=None, range=None, + stream=False, + chunk_size=None, ): """Downloads storage blob data. @@ -808,11 +811,17 @@ def get_data( (e.g. ``((0, 99), (200-299))``). A list or tuple of one integer implies no upper bound; in this case the integer can be negative, indicating the count back from the end of the blob. + stream : bool, optional + If True, return a generator that will yield the data in chunks. Defaults to False. + chunk_size : int, optional + If stream is True, the size of chunks over which to stream. Default is whatever + chunks are received on the wire. Returns ------- - bytes - The data retrieved from the Blob. + bytes or generator + The data retrieved from the Blob. If stream is True, returned as an iterator + (generator) which will yeild the data in chunks. Raises ------ @@ -827,7 +836,59 @@ def get_data( raise TypeError("Must specify exactly one of id or name parameters") if not id: id = f"{storage_type}/{cls.namespace_id(namespace)}/{name}" - return cls(id=id, client=client)._do_download(range=range) + + dest = None + if stream: + + def generator(response): + try: + yield from response.iter_content(chunk_size) + finally: + response.close() + + dest = generator + + return cls(id=id, client=client)._do_download(dest=dest, range=range) + + @classmethod + def delete_many(cls, ids, client=None): + """Delete many blobs from the Descartes Labs catalog. + + Only those blobs that exist and are owned by the user will be deleted. + No errors will be raised for blobs that do not exist or are not owned by + the user. If you need to know, compare the supplied list of ids with the + returned list of delete ids. + + All blobs to be deleted must belong to the same purchase. + + Parameters + ---------- + ids : list(str) + A list of blob ids to delete. + client : CatalogClient, optional + A `CatalogClient` instance to use for requests to the Descartes Labs catalog. + The :py:meth:`~descarteslabs.catalog.CatalogClient.get_default_client` will + be used if not set. + + Returns + ------- + list(str) + A list of the ids of the blobs that were successfully deleted. + + Raises + ------ + ~descarteslabs.exceptions.ClientError or ~descarteslabs.exceptions.ServerError + :ref:`Spurious exception ` that can occur during a + network request. + """ + if client is None: + client = CatalogClient.get_default_client() + + blob_delete = BlobDelete(ids=ids, client=client) + + blob_delete.save() + + return blob_delete.ids def _do_download(self, dest=None, range=None): download = BlobDownload.get(id=self.id, client=self._client) diff --git a/descarteslabs/core/catalog/blob_delete.py b/descarteslabs/core/catalog/blob_delete.py new file mode 100644 index 00000000..fde1f97c --- /dev/null +++ b/descarteslabs/core/catalog/blob_delete.py @@ -0,0 +1,31 @@ +# Copyright 2018-2023 Descartes Labs. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .attributes import ListAttribute, TypedAttribute +from .catalog_base import CatalogObjectBase + + +class BlobDelete(CatalogObjectBase): + """Internal class used to perform bulk deleting of blobs.""" + + _doc_type = "storage_delete" + _url = "/storage/delete" + _no_inherit = True + + ids = ListAttribute( + TypedAttribute(str), + mutable=False, + serializable=True, + doc="""list[str]: List of blob IDs to delete.""", + ) diff --git a/descarteslabs/core/catalog/product.py b/descarteslabs/core/catalog/product.py index c75cfd8f..8fae6c9d 100644 --- a/descarteslabs/core/catalog/product.py +++ b/descarteslabs/core/catalog/product.py @@ -341,6 +341,8 @@ def delete_related_objects(self): return DeletionTaskStatus( id=self.id, _client=self._client, **response["data"]["attributes"] ) + else: + return None @check_deleted def get_delete_status(self):