Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion tiledb/sm/filesystem/azure.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ static std::shared_ptr<::Azure::Core::Http::HttpTransport> create_transport(
using namespace tiledb::common;
using tiledb::common::filesystem::directory_entry;

namespace {
/**
* The maximum number of committed parts a block blob can have.
*
* This value was obtained from
* https://learn.microsoft.com/en-us/azure/storage/blobs/scalability-targets
*/
constexpr int max_committed_block_num = 50000;
} // namespace

namespace tiledb::sm {

/** Converts an Azure nullable value to an STL optional. */
Expand Down Expand Up @@ -390,8 +400,23 @@ Status Azure::flush_blob(const URI& uri) {
.GetBlockBlobClient(blob_path)
.CommitBlockList(std::vector(block_ids.begin(), block_ids.end()));
} catch (const ::Azure::Storage::StorageException& e) {
std::string msg_extra;
// Unlike S3 where each part has its number and uploading a part with an out
// of bounds number fails, Azure blocks do not have a sequenced number. We
// could fail ourselves as soon as we hit the limit, but if Azure ever
// increases it, it would need updating our own code. Therefore delay the
// check until we finish the upload.
// We could also add an explanation if we hit the limit of uncommitted
// blocks (100,000 at the time of writing this), but it would require more
// refactorings, and it's much harder to hit with reasonably sized blocks.
if (block_ids.size() > max_committed_block_num) {
msg_extra +=
" This error might be resolved by increasing the value of the "
"'vfs.azure.block_list_block_size' config option";
}
return LOG_STATUS(Status_AzureError(
"Flush blob failed on: " + uri.to_string() + "; " + e.Message));
"Flush blob failed on: " + uri.to_string() + "; " + e.Message +
msg_extra));
}

return Status::Ok();
Expand Down
19 changes: 16 additions & 3 deletions tiledb/sm/filesystem/s3.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ using tiledb::common::filesystem::directory_entry;

namespace {

/**
* The maximum number of parts a multipart upload can have.
*
* This value was obtained from
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
*/
constexpr int max_multipart_part_num = 10000;

/*
* Functions to convert strings to AWS enums.
*
Expand Down Expand Up @@ -2029,9 +2037,14 @@ Status S3::get_make_upload_part_req(
if (!success) {
UniqueReadLock unique_rl(&multipart_upload_rwlock_);
auto state = &multipart_upload_states_.at(uri_path);
Status st = Status_S3Error(
std::string("Failed to upload part of S3 object '") + uri.c_str() +
outcome_error_message(upload_part_outcome));
auto msg = std::string("Failed to upload part of S3 object '") +
uri.c_str() + "' " + outcome_error_message(upload_part_outcome);
if (ctx.upload_part_num > max_multipart_part_num) {
msg +=
" This error might be resolved by increasing the value of the "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would phrase it as: Maximum part limit might have been hit, try increasing the value of "'vfs.s3.multipart_part_size' config option

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the fact that a part limit has been reached is effectively conveyed in the Part number must be an integer between 1 and 10000, inclusive message returned by S3.

"'vfs.s3.multipart_part_size' config option";
}
Status st = Status_S3Error(msg);
// Lock multipart state
std::unique_lock<std::mutex> state_lck(state->mtx);
unique_rl.unlock();
Expand Down
Loading