Skip to content

Commit

Permalink
Add some more datastructure tests
Browse files Browse the repository at this point in the history
  • Loading branch information
durner committed Jul 31, 2023
1 parent d1aecf5 commit ebd170f
Show file tree
Hide file tree
Showing 17 changed files with 164 additions and 41 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ In this repository, we present AnyBlob.
AnyBlob is a universal download manager that allows to retrieve and upload objects to different cloud object stores.
Our download manager uses less CPU resources than cloud-vendor provided libraries while retaining maximum throughput performance.
AnyBlob leverages IO\_uring for superior performance per core.
For experimental results, please visit our research paper at [].
For experimental results, please visit our research paper at [PVLDB 16](https://www.vldb.org/pvldb/vol16/p2734-durner.pdf).

## Building AnyBlob

Expand Down Expand Up @@ -42,9 +42,10 @@ For coverage testing you can simply `make coverage` and open the coverage report

## Cite this work

If you are using AnyBlob in our scientific work, please cite:
If you are using AnyBlob in your scientific work, please cite:

```
Exploiting Cloud Object Storage for High-Performance Analytics
Dominik Durner, Viktor Leis, and Thomas Neumann
PVLDB 16, 11 (2023), 49th International Conference on Very Large Data Bases
```
10 changes: 5 additions & 5 deletions include/cloud/aws.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,21 +111,21 @@ class AWS : public Provider {
/// Builds the http request for downloading a blob or listing the directory
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> getRequest(const std::string& filePath, const std::pair<uint64_t, uint64_t>& range) const override;
/// Builds the http request for putting objects without the object data itself
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequestGeneric(const std::string& filePath, const std::string_view object, uint16_t part, const std::string_view uploadId) const override;
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequestGeneric(const std::string& filePath, std::string_view object, uint16_t part, std::string_view uploadId) const override;
/// Builds the http request for putting objects without the object data itself
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequest(const std::string& filePath, const std::string_view object) const override {
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequest(const std::string& filePath, std::string_view object) const override {
return putRequestGeneric(filePath, object, 0, "");
}
// Builds the http request for deleting an objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> deleteRequest(const std::string& filePath) const override {
return deleteRequestGeneric(filePath, "");
}
/// Builds the http request for deleting objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> deleteRequestGeneric(const std::string& filePath, const std::string_view uploadId) const override;
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> deleteRequestGeneric(const std::string& filePath, std::string_view uploadId) const override;
/// Builds the http request for creating multipart put objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> createMultiPartRequest(const std::string& filePath) const override;
/// Builds the http request for completing multipart put objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> completeMultiPartRequest(const std::string& filePath, const std::string_view uploadId, const std::vector<std::string>& etags) const override;
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> completeMultiPartRequest(const std::string& filePath, std::string_view uploadId, const std::vector<std::string>& etags) const override;

/// Get the address of the server
[[nodiscard]] std::string getAddress() const override;
Expand All @@ -135,7 +135,7 @@ class AWS : public Provider {
/// Builds the info http request
[[nodiscard]] static std::unique_ptr<utils::DataVector<uint8_t>> downloadInstanceInfo(const std::string& info = "instance-type");
/// Get the IAM address
[[nodiscard]] static constexpr const char* getIAMAddress() { return "169.254.169.254"; }
[[nodiscard]] static constexpr std::string_view getIAMAddress() { return "169.254.169.254"; }
/// Get the port of the IAM server
[[nodiscard]] static constexpr uint32_t getIAMPort() { return 80; }

Expand Down
4 changes: 2 additions & 2 deletions include/cloud/azure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class Azure : public Provider {
/// Builds the http request for downloading a blob or listing the directory
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> getRequest(const std::string& filePath, const std::pair<uint64_t, uint64_t>& range) const override;
/// Builds the http request for putting objects without the object data itself
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequest(const std::string& filePath, const std::string_view object) const override;
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequest(const std::string& filePath, std::string_view object) const override;
// Builds the http request for deleting an objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> deleteRequest(const std::string& filePath) const override;

Expand All @@ -89,7 +89,7 @@ class Azure : public Provider {
/// Builds the info http request
[[nodiscard]] static std::unique_ptr<utils::DataVector<uint8_t>> downloadInstanceInfo();
/// Get the IAM address
[[nodiscard]] static constexpr const char* getIAMAddress() {
[[nodiscard]] static constexpr std::string_view getIAMAddress() {
return "169.254.169.254";
}
/// Get the port of the IAM server
Expand Down
10 changes: 5 additions & 5 deletions include/cloud/gcp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,21 +79,21 @@ class GCP : public Provider {
/// Builds the http request for downloading a blob or listing the directory
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> getRequest(const std::string& filePath, const std::pair<uint64_t, uint64_t>& range) const override;
/// Builds the http request for putting objects without the object data itself
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequestGeneric(const std::string& filePath, const std::string_view object, uint16_t part, const std::string_view uploadId) const override;
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequestGeneric(const std::string& filePath, std::string_view object, uint16_t part, std::string_view uploadId) const override;
/// Builds the http request for putting objects without the object data itself
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequest(const std::string& filePath, const std::string_view object) const override {
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> putRequest(const std::string& filePath, std::string_view object) const override {
return putRequestGeneric(filePath, object, 0, "");
}
// Builds the http request for deleting an objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> deleteRequest(const std::string& filePath) const override {
return deleteRequestGeneric(filePath, "");
}
/// Builds the http request for deleting objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> deleteRequestGeneric(const std::string& filePath, const std::string_view uploadId) const override;
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> deleteRequestGeneric(const std::string& filePath, std::string_view uploadId) const override;
/// Builds the http request for creating multipart put objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> createMultiPartRequest(const std::string& filePath) const override;
/// Builds the http request for completing multipart put objects
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> completeMultiPartRequest(const std::string& filePath, const std::string_view uploadId, const std::vector<std::string>& etags) const override;
[[nodiscard]] std::unique_ptr<utils::DataVector<uint8_t>> completeMultiPartRequest(const std::string& filePath, std::string_view uploadId, const std::vector<std::string>& etags) const override;

/// Get the address of the server
[[nodiscard]] std::string getAddress() const override;
Expand All @@ -103,7 +103,7 @@ class GCP : public Provider {
/// Builds the info http request
[[nodiscard]] static std::unique_ptr<utils::DataVector<uint8_t>> downloadInstanceInfo(const std::string& info = "machine-type");
/// Get the IAM address
[[nodiscard]] static constexpr const char* getIAMAddress() {
[[nodiscard]] static constexpr std::string_view getIAMAddress() {
return "169.254.169.254";
}
/// Get the port of the IAM server
Expand Down
14 changes: 7 additions & 7 deletions include/cloud/provider.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class Provider {
/// Builds the http request for downloading a blob or listing a directory
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> getRequest(const std::string& filePath, const std::pair<uint64_t, uint64_t>& range) const = 0;
/// Builds the http request for putting an object without the actual data (header only according to the data and length provided)
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> putRequest(const std::string& filePath, const std::string_view object) const = 0;
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> putRequest(const std::string& filePath, std::string_view object) const = 0;
/// Builds the http request for deleting an object
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> deleteRequest(const std::string& filePath) const = 0;
/// Get the address of the server
Expand All @@ -84,13 +84,13 @@ class Provider {
/// Is multipart upload supported, if size > 0?
[[nodiscard]] virtual uint64_t multipartUploadSize() const { return 0; }
/// Builds the http request for putting multipart objects without the object data itself
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> putRequestGeneric(const std::string& /*filePath*/, const std::string_view /*object*/, uint16_t /*part*/, const std::string_view /*uploadId*/) const;
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> putRequestGeneric(const std::string& /*filePath*/, std::string_view /*object*/, uint16_t /*part*/, std::string_view /*uploadId*/) const;
/// Builds the http request for deleting multipart aborted objects
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> deleteRequestGeneric(const std::string& /*filePath*/, const std::string_view /*uploadId*/) const;
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> deleteRequestGeneric(const std::string& /*filePath*/, std::string_view /*uploadId*/) const;
/// Builds the http request for creating multipart put objects
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> createMultiPartRequest(const std::string& /*filePath*/) const;
/// Builds the http request for completing multipart put objects
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> completeMultiPartRequest(const std::string& /*filePath*/, const std::string_view /*uploadId*/, const std::vector<std::string>& /*etags*/) const;
[[nodiscard]] virtual std::unique_ptr<utils::DataVector<uint8_t>> completeMultiPartRequest(const std::string& /*filePath*/, std::string_view /*uploadId*/, const std::vector<std::string>& /*etags*/) const;

/// Initialize secret
virtual void initSecret(network::TaskedSendReceiver& /*sendReceiver*/) {}
Expand All @@ -101,17 +101,17 @@ class Provider {
/// Gets the cloud provider type
[[nodiscard]] CloudService getType() { return _type; }
/// Is it a remote file?
[[nodiscard]] static bool isRemoteFile(const std::string_view fileName) noexcept;
[[nodiscard]] static bool isRemoteFile(std::string_view fileName) noexcept;
/// Get the path of the parent dir without the remote info
[[nodiscard]] static std::string getRemoteParentDirectory(std::string fileName) noexcept;
/// Get a region and bucket name
[[nodiscard]] static Provider::RemoteInfo getRemoteInfo(const std::string& fileName);
/// Get the key from a keyFile
[[nodiscard]] static std::string getKey(const std::string& keyFile);
/// Get the etag from the upload header
[[nodiscard]] static std::string getETag(const std::string_view header);
[[nodiscard]] static std::string getETag(std::string_view header);
/// Get the upload id from the multipart request body
[[nodiscard]] static std::string getUploadId(const std::string_view body);
[[nodiscard]] static std::string getUploadId(std::string_view body);

/// Create a provider (keyId is access email for GCP/Azure)
[[nodiscard]] static std::unique_ptr<Provider> makeProvider(const std::string& filepath, bool https = true, const std::string& keyId = "", const std::string& keyFile = "", network::TaskedSendReceiver* sendReceiver = nullptr);
Expand Down
2 changes: 1 addition & 1 deletion include/network/http_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class HTTPHelper {

private:
/// Detect the protocol
[[nodiscard]] static Info detect(const std::string_view s);
[[nodiscard]] static Info detect(std::string_view s);

public:
/// Retrieve the content without http meta info, note that this changes data
Expand Down
4 changes: 2 additions & 2 deletions include/network/original_message.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct OriginalMessage {
uint64_t putLength;

/// The constructor
OriginalMessage(std::unique_ptr<utils::DataVector<uint8_t>> message, std::string hostname, uint32_t port, uint8_t* receiveBuffer = nullptr, uint64_t bufferSize = 0, uint64_t traceId = 0) : message(std::move(message)), result(receiveBuffer, bufferSize), hostname(hostname), port(port), traceId(traceId), putData(nullptr), putLength() {}
OriginalMessage(std::unique_ptr<utils::DataVector<uint8_t>> message, std::string_view hostname, uint32_t port, uint8_t* receiveBuffer = nullptr, uint64_t bufferSize = 0, uint64_t traceId = 0) : message(std::move(message)), result(receiveBuffer, bufferSize), hostname(hostname), port(port), traceId(traceId), putData(nullptr), putLength() {}

/// The destructor
virtual ~OriginalMessage() = default;
Expand Down Expand Up @@ -72,7 +72,7 @@ struct OriginalCallbackMessage : public OriginalMessage {
Callback callback;

/// The constructor
OriginalCallbackMessage(Callback&& callback, std::unique_ptr<utils::DataVector<uint8_t>> message, std::string hostname, uint32_t port, uint8_t* receiveBuffer = nullptr, uint64_t bufferSize = 0, uint64_t traceId = 0) : OriginalMessage(std::move(message), hostname, port, receiveBuffer, bufferSize, traceId), callback(std::forward<Callback>(callback)) {}
OriginalCallbackMessage(Callback&& callback, std::unique_ptr<utils::DataVector<uint8_t>> message, std::string_view hostname, uint32_t port, uint8_t* receiveBuffer = nullptr, uint64_t bufferSize = 0, uint64_t traceId = 0) : OriginalMessage(std::move(message), hostname, port, receiveBuffer, bufferSize, traceId), callback(std::forward<Callback>(callback)) {}

/// The destructor
virtual ~OriginalCallbackMessage() override = default;
Expand Down
6 changes: 3 additions & 3 deletions src/cloud/aws.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ unique_ptr<utils::DataVector<uint8_t>> AWS::getRequest(const string& filePath, c
return make_unique<utils::DataVector<uint8_t>>(reinterpret_cast<uint8_t*>(httpHeader.data()), reinterpret_cast<uint8_t*>(httpHeader.data() + httpHeader.size()));
}
//---------------------------------------------------------------------------
unique_ptr<utils::DataVector<uint8_t>> AWS::putRequestGeneric(const string& filePath, const string_view object, uint16_t part, const string_view uploadId) const
unique_ptr<utils::DataVector<uint8_t>> AWS::putRequestGeneric(const string& filePath, string_view object, uint16_t part, string_view uploadId) const
// Builds the http request for putting objects without the object data itself
{
if (!validKeys())
Expand Down Expand Up @@ -270,7 +270,7 @@ unique_ptr<utils::DataVector<uint8_t>> AWS::putRequestGeneric(const string& file
return make_unique<utils::DataVector<uint8_t>>(reinterpret_cast<uint8_t*>(httpHeader.data()), reinterpret_cast<uint8_t*>(httpHeader.data() + httpHeader.size()));
}
//---------------------------------------------------------------------------
unique_ptr<utils::DataVector<uint8_t>> AWS::deleteRequestGeneric(const string& filePath, const string_view uploadId) const
unique_ptr<utils::DataVector<uint8_t>> AWS::deleteRequestGeneric(const string& filePath, string_view uploadId) const
// Builds the http request for deleting an objects
{
if (!validKeys())
Expand Down Expand Up @@ -345,7 +345,7 @@ unique_ptr<utils::DataVector<uint8_t>> AWS::createMultiPartRequest(const string&
return make_unique<utils::DataVector<uint8_t>>(reinterpret_cast<uint8_t*>(httpHeader.data()), reinterpret_cast<uint8_t*>(httpHeader.data() + httpHeader.size()));
}
//---------------------------------------------------------------------------
unique_ptr<utils::DataVector<uint8_t>> AWS::completeMultiPartRequest(const string& filePath, const string_view uploadId, const std::vector<std::string>& etags) const
unique_ptr<utils::DataVector<uint8_t>> AWS::completeMultiPartRequest(const string& filePath, string_view uploadId, const std::vector<std::string>& etags) const
// Builds the http request for completing multipart upload objects
{
if (!validKeys())
Expand Down
2 changes: 1 addition & 1 deletion src/cloud/azure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ unique_ptr<utils::DataVector<uint8_t>> Azure::getRequest(const string& filePath,
return make_unique<utils::DataVector<uint8_t>>(reinterpret_cast<uint8_t*>(httpHeader.data()), reinterpret_cast<uint8_t*>(httpHeader.data() + httpHeader.size()));
}
//---------------------------------------------------------------------------
unique_ptr<utils::DataVector<uint8_t>> Azure::putRequest(const string& filePath, const string_view object) const
unique_ptr<utils::DataVector<uint8_t>> Azure::putRequest(const string& filePath, string_view object) const
// Builds the http request for putting objects without the object data itself
{
AzureSigner::Request request;
Expand Down
6 changes: 3 additions & 3 deletions src/cloud/gcp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ unique_ptr<utils::DataVector<uint8_t>> GCP::getRequest(const string& filePath, c
return make_unique<utils::DataVector<uint8_t>>(reinterpret_cast<uint8_t*>(httpHeader.data()), reinterpret_cast<uint8_t*>(httpHeader.data() + httpHeader.size()));
}
//---------------------------------------------------------------------------
unique_ptr<utils::DataVector<uint8_t>> GCP::putRequestGeneric(const string& filePath, const string_view object, uint16_t part, const string_view uploadId) const
unique_ptr<utils::DataVector<uint8_t>> GCP::putRequestGeneric(const string& filePath, string_view object, uint16_t part, string_view uploadId) const
// Builds the http request for putting objects without the object data itself
{
GCPSigner::Request request;
Expand Down Expand Up @@ -139,7 +139,7 @@ unique_ptr<utils::DataVector<uint8_t>> GCP::putRequestGeneric(const string& file
return make_unique<utils::DataVector<uint8_t>>(reinterpret_cast<uint8_t*>(httpHeader.data()), reinterpret_cast<uint8_t*>(httpHeader.data() + httpHeader.size()));
}
//---------------------------------------------------------------------------
unique_ptr<utils::DataVector<uint8_t>> GCP::deleteRequestGeneric(const string& filePath, const string_view uploadId) const
unique_ptr<utils::DataVector<uint8_t>> GCP::deleteRequestGeneric(const string& filePath, string_view uploadId) const
// Builds the http request for deleting objects
{
GCPSigner::Request request;
Expand Down Expand Up @@ -196,7 +196,7 @@ unique_ptr<utils::DataVector<uint8_t>> GCP::createMultiPartRequest(const string&
return make_unique<utils::DataVector<uint8_t>>(reinterpret_cast<uint8_t*>(httpHeader.data()), reinterpret_cast<uint8_t*>(httpHeader.data() + httpHeader.size()));
}
//---------------------------------------------------------------------------
unique_ptr<utils::DataVector<uint8_t>> GCP::completeMultiPartRequest(const string& filePath, const string_view uploadId, const std::vector<std::string>& etags) const
unique_ptr<utils::DataVector<uint8_t>> GCP::completeMultiPartRequest(const string& filePath, string_view uploadId, const std::vector<std::string>& etags) const
// Builds the http request for completing multipart upload objects
{
string content = "<CompleteMultipartUpload>\n";
Expand Down
Loading

0 comments on commit ebd170f

Please sign in to comment.