Skip to content
This repository was archived by the owner on Jul 18, 2024. It is now read-only.

Commit c590f5b

Browse files
committed
Improve upload performance for numerous files
- Do not perform remote file point query on overwrite - Further improvements may include moving to a one-time list cache
1 parent 2a798b2 commit c590f5b

File tree

3 files changed

+23
-9
lines changed

3 files changed

+23
-9
lines changed

blobxfer/operations/download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ def _run(self):
820820
'was specified')
821821
else:
822822
logger.debug(
823-
('{0} files {1:.4f} MiB filesize and/or lmt_ge '
823+
('{0} files {1:.4f} MiB filesize, lmt_ge, or no overwrite '
824824
'skipped').format(
825825
skipped_files, skipped_size / blobxfer.util.MEGABYTE))
826826
logger.debug(

blobxfer/operations/upload.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,9 @@ def _check_for_existing_remote(self, sa, cont, name):
886886
:rtype: blobxfer.models.azure.StorageEntity
887887
:return: remote storage entity
888888
"""
889+
ase = None
890+
if self._spec.options.overwrite or not sa.can_read_object:
891+
return ase
889892
if self._spec.options.mode == blobxfer.models.azure.StorageModes.File:
890893
fp = blobxfer.operations.azure.file.get_file_properties(
891894
sa.file_client, cont, name)
@@ -918,8 +921,6 @@ def _check_for_existing_remote(self, sa, cont, name):
918921
self._spec.options.store_file_properties.content_type or
919922
blobxfer.util.get_mime_type(ase.name)
920923
)
921-
else:
922-
ase = None
923924
return ase
924925

925926
def _generate_destination_for_source(self, local_path):
@@ -958,10 +959,7 @@ def _generate_destination_for_source(self, local_path):
958959
VectoredIoDistributionMode.Stripe):
959960
ase = None
960961
else:
961-
if sa.can_read_object:
962-
ase = self._check_for_existing_remote(sa, cont, name)
963-
else:
964-
ase = None
962+
ase = self._check_for_existing_remote(sa, cont, name)
965963
if ase is None:
966964
# encryption metadata will be populated later, if required
967965
ase = blobxfer.models.azure.StorageEntity(cont, ed=None)
@@ -1212,7 +1210,7 @@ def _run(self):
12121210
'was specified')
12131211
else:
12141212
logger.debug(
1215-
('{0} files {1:.4f} MiB filesize and/or lmt_ge '
1213+
('{0} files {1:.4f} MiB filesize, lmt_ge, or no overwrite '
12161214
'skipped').format(
12171215
skipped_files, skipped_size / blobxfer.util.MEGABYTE))
12181216
logger.debug(

tests/test_blobxfer_operations_upload.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -928,15 +928,22 @@ def test_check_upload_conditions(gmfm):
928928
def test_check_for_existing_remote(gbp, gfp):
929929
u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
930930
u._general_options.dry_run = False
931+
u._spec.options.overwrite = True
931932

932933
sa = mock.MagicMock()
933934
sa.name = 'name'
934935
sa.endpoint = 'ep'
936+
sa.can_read_object = True
935937

936938
u._spec.options.mode = azmodels.StorageModes.File
937939
gfp.return_value = None
938940
assert u._check_for_existing_remote(sa, 'cont', 'name') is None
939941

942+
u._spec.options.overwrite = False
943+
944+
gfp.return_value = None
945+
assert u._check_for_existing_remote(sa, 'cont', 'name') is None
946+
940947
with mock.patch(
941948
'blobxfer.models.crypto.EncryptionMetadata.'
942949
'encryption_metadata_exists', return_value=False):
@@ -989,6 +996,7 @@ def test_check_for_existing_remote(gbp, gfp):
989996
def test_generate_destination_for_source():
990997
u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
991998
u._general_options.dry_run = False
999+
u._spec.options.overwrite = False
9921000
u._check_for_existing_remote = mock.MagicMock()
9931001

9941002
lp = mock.MagicMock()
@@ -1041,7 +1049,7 @@ def test_generate_destination_for_source():
10411049
a, b = next(u._generate_destination_for_source(lp))
10421050
assert a == sa
10431051
assert b is not None
1044-
assert u._check_for_existing_remote.call_count == 1 # should not change
1052+
assert u._check_for_existing_remote.call_count == 2
10451053

10461054

10471055
def test_vectorize_and_bind():
@@ -1080,6 +1088,7 @@ def test_vectorize_and_bind():
10801088
# no vectorization
10811089
u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
10821090
u._general_options.dry_run = False
1091+
u._spec.options.overwrite = False
10831092
u._spec.options.vectored_io.distribution_mode = \
10841093
models.VectoredIoDistributionMode.Disabled
10851094
u._check_upload_conditions = mock.MagicMock()
@@ -1102,6 +1111,7 @@ def test_vectorize_and_bind():
11021111
# stripe vectorization 1 slice
11031112
u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
11041113
u._general_options.dry_run = False
1114+
u._spec.options.overwrite = False
11051115
u._check_upload_conditions = mock.MagicMock()
11061116
u._check_upload_conditions.return_value = ops.UploadAction.Upload
11071117
u._spec.options.vectored_io.distribution_mode = \
@@ -1164,6 +1174,7 @@ def test_vectorize_and_bind():
11641174
# replication single target
11651175
u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
11661176
u._general_options.dry_run = False
1177+
u._spec.options.overwrite = False
11671178
u._spec.options.vectored_io.distribution_mode = \
11681179
models.VectoredIoDistributionMode.Replica
11691180
u._check_upload_conditions = mock.MagicMock()
@@ -1216,6 +1227,7 @@ def test_run(lfmo, urm, tmpdir):
12161227
u._general_options.concurrency.md5_processes = 1
12171228
u._general_options.concurrency.crypto_processes = 1
12181229
u._general_options.resume_file = 'resume'
1230+
u._spec.options.overwrite = False
12191231
u._spec.options.store_file_properties.md5 = True
12201232
u._spec.skip_on.md5_match = True
12211233
u._spec.options.rsa_public_key = 'abc'
@@ -1313,6 +1325,7 @@ def test_run(lfmo, urm, tmpdir):
13131325
# regular execution
13141326
u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
13151327
u._general_options.dry_run = False
1328+
u._spec.options.overwrite = False
13161329
u._general_options.concurrency.disk_threads = 1
13171330
u._general_options.concurrency.transfer_threads = 1
13181331
u._general_options.concurrency.md5_processes = 1
@@ -1352,6 +1365,7 @@ def test_run(lfmo, urm, tmpdir):
13521365
u._general_options.concurrency.md5_processes = 1
13531366
u._general_options.concurrency.crypto_processes = 0
13541367
u._general_options.resume_file = 'resume'
1368+
u._spec.options.overwrite = False
13551369
u._spec.options.store_file_properties.md5 = True
13561370
u._spec.skip_on.md5_match = True
13571371
u._spec.options.rsa_public_key = None
@@ -1392,6 +1406,7 @@ def test_run(lfmo, urm, tmpdir):
13921406
u._general_options.concurrency.md5_processes = 1
13931407
u._general_options.concurrency.crypto_processes = 0
13941408
u._general_options.resume_file = 'resume'
1409+
u._spec.options.overwrite = False
13951410
u._spec.options.store_file_properties.md5 = True
13961411
u._spec.skip_on.md5_match = True
13971412
u._spec.options.rsa_public_key = None
@@ -1455,6 +1470,7 @@ def test_run(lfmo, urm, tmpdir):
14551470
def test_start():
14561471
u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock())
14571472
u._general_options.dry_run = False
1473+
u._spec.options.overwrite = False
14581474
u._spec.options.delete_only = False
14591475
u._wait_for_transfer_threads = mock.MagicMock()
14601476
u._wait_for_disk_threads = mock.MagicMock()

0 commit comments

Comments
 (0)