Skip to content

Commit

Permalink
[Core-240] Catalog Client: Add Blob.delete_many method (#12398)
Browse files Browse the repository at this point in the history
GitOrigin-RevId: b5c9b08ba13ae906ca03473b7611909c9678a6cc
  • Loading branch information
stephencpope authored and Descartes Labs Build committed Jan 12, 2024
1 parent b55ba5e commit 6e608d9
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 3 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Changelog
But why would you even want to go there?
- *Breaking Change*: Derived bands, never supported in the AWS environment and catalog products, have been
removed.
- The new `Blob.delete_many` method may be used to delete large numbers of blobs efficiently.

### Compute

Expand Down
67 changes: 64 additions & 3 deletions descarteslabs/core/catalog/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
TypedAttribute,
parse_iso_datetime,
)
from .blob_delete import BlobDelete
from .blob_download import BlobDownload
from .catalog_base import CatalogClient, CatalogObject, check_deleted
from .search import AggregateDateField, GeoSearch, SummarySearchMixin
Expand Down Expand Up @@ -780,6 +781,8 @@ def get_data(
name=None,
client=None,
range=None,
stream=False,
chunk_size=None,
):
"""Downloads storage blob data.
Expand Down Expand Up @@ -808,11 +811,17 @@ def get_data(
(e.g. ``((0, 99), (200-299))``). A list or tuple of one integer implies
no upper bound; in this case the integer can be negative, indicating the
count back from the end of the blob.
stream : bool, optional
If True, return a generator that will yield the data in chunks. Defaults to False.
chunk_size : int, optional
If stream is True, the size of chunks over which to stream. Default is whatever
chunks are received on the wire.
Returns
-------
bytes
The data retrieved from the Blob.
bytes or generator
The data retrieved from the Blob. If stream is True, returned as an iterator
(generator) which will yeild the data in chunks.
Raises
------
Expand All @@ -827,7 +836,59 @@ def get_data(
raise TypeError("Must specify exactly one of id or name parameters")
if not id:
id = f"{storage_type}/{cls.namespace_id(namespace)}/{name}"
return cls(id=id, client=client)._do_download(range=range)

dest = None
if stream:

def generator(response):
try:
yield from response.iter_content(chunk_size)
finally:
response.close()

dest = generator

return cls(id=id, client=client)._do_download(dest=dest, range=range)

@classmethod
def delete_many(cls, ids, client=None):
"""Delete many blobs from the Descartes Labs catalog.
Only those blobs that exist and are owned by the user will be deleted.
No errors will be raised for blobs that do not exist or are not owned by
the user. If you need to know, compare the supplied list of ids with the
returned list of delete ids.
All blobs to be deleted must belong to the same purchase.
Parameters
----------
ids : list(str)
A list of blob ids to delete.
client : CatalogClient, optional
A `CatalogClient` instance to use for requests to the Descartes Labs catalog.
The :py:meth:`~descarteslabs.catalog.CatalogClient.get_default_client` will
be used if not set.
Returns
-------
list(str)
A list of the ids of the blobs that were successfully deleted.
Raises
------
~descarteslabs.exceptions.ClientError or ~descarteslabs.exceptions.ServerError
:ref:`Spurious exception <network_exceptions>` that can occur during a
network request.
"""
if client is None:
client = CatalogClient.get_default_client()

blob_delete = BlobDelete(ids=ids, client=client)

blob_delete.save()

return blob_delete.ids

def _do_download(self, dest=None, range=None):
download = BlobDownload.get(id=self.id, client=self._client)
Expand Down
31 changes: 31 additions & 0 deletions descarteslabs/core/catalog/blob_delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2018-2023 Descartes Labs.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .attributes import ListAttribute, TypedAttribute
from .catalog_base import CatalogObjectBase


class BlobDelete(CatalogObjectBase):
"""Internal class used to perform bulk deleting of blobs."""

_doc_type = "storage_delete"
_url = "/storage/delete"
_no_inherit = True

ids = ListAttribute(
TypedAttribute(str),
mutable=False,
serializable=True,
doc="""list[str]: List of blob IDs to delete.""",
)
2 changes: 2 additions & 0 deletions descarteslabs/core/catalog/product.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ def delete_related_objects(self):
return DeletionTaskStatus(
id=self.id, _client=self._client, **response["data"]["attributes"]
)
else:
return None

@check_deleted
def get_delete_status(self):
Expand Down

0 comments on commit 6e608d9

Please sign in to comment.