Skip to content

Commit

Permalink
ADD: Add DBN encoding to C++
Browse files Browse the repository at this point in the history
  • Loading branch information
threecgreen committed Jul 8, 2024
1 parent 77671fd commit 7eb0c14
Show file tree
Hide file tree
Showing 28 changed files with 891 additions and 160 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,28 @@

## 0.20.0 - TBD

This release improves historical symbology support with the new `TsSymbolMap` class that
This release adds support for encoding DBN within the C++ client.
It also improves historical symbology support with the new `TsSymbolMap` class that
handles mapping historical records to a text symbol. To support this class, several types
for date fields were changed from strings or ints to `date::year_month_day`.

### Enhancements
- Added `TsSymbolMap` to support historical symbology where mappings change between days
- Added `DbnEncoder` class for encoding DBN data
- Added blocking API similar to `LiveBlocking` to `DbnFileStore` with new `GetMetadata`
and `NextRecord` methods
- Added `PitSymbol` map constructor from `Metadata` and a `date::year_month_day`
- Added `Metadata::CreateSymbolMap` and `Metadata::CreateSymbolMapForDate` methods for
creating symbology maps from historical metadata
- Added blocking API similar to `LiveBlocking` to `DbnFileStore`
- Added `SymbologyResolution::CreateSymbolMap` method for creating a symbology map from
a symbology resolution response
- Added `InFileStream` and `OutFileStream` helper classes for reading and writing binary
output respectively

### Breaking changes
- Added new dependency on [Howard Hinnant's date library](https://howardhinnant.github.io/date/date.html)
- Added `ILogReceiver*` parameter to all `DbnDecoder` constructors and one `DbnFileStore` constructor
- Removed type `StrMappingInterval`. `MappingInterval` is now also used in `SymbologyResolution`.
- Changed type of `start_date` and `end_date` in `MappingInterval` to `date::year_month_day`
- Added `stype_in` and `stype_out` fields to `SymbologyResolution` to support creating
Expand Down
7 changes: 5 additions & 2 deletions cmake/SourcesAndHeaders.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ set(headers
include/databento/datetime.hpp
include/databento/dbn.hpp
include/databento/dbn_decoder.hpp
include/databento/dbn_encoder.hpp
include/databento/dbn_file_store.hpp
include/databento/detail/file_stream.hpp
include/databento/detail/http_client.hpp
include/databento/detail/json_helpers.hpp
include/databento/detail/scoped_fd.hpp
Expand All @@ -16,6 +16,7 @@ set(headers
include/databento/detail/zstd_stream.hpp
include/databento/enums.hpp
include/databento/exceptions.hpp
include/databento/file_stream.hpp
include/databento/fixed_price.hpp
include/databento/flag_set.hpp
include/databento/historical.hpp
Expand All @@ -39,9 +40,10 @@ set(sources
src/compat.cpp
src/datetime.cpp
src/dbn.cpp
src/dbn_constants.hpp
src/dbn_decoder.cpp
src/dbn_encoder.cpp
src/dbn_file_store.cpp
src/detail/file_stream.cpp
src/detail/http_client.cpp
src/detail/json_helpers.cpp
src/detail/scoped_fd.cpp
Expand All @@ -50,6 +52,7 @@ set(sources
src/detail/zstd_stream.cpp
src/enums.cpp
src/exceptions.cpp
src/file_stream.cpp
src/fixed_price.cpp
src/flag_set.cpp
src/historical.cpp
Expand Down
15 changes: 8 additions & 7 deletions include/databento/dbn_decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,24 @@
#include <string>

#include "databento/dbn.hpp"
#include "databento/detail/file_stream.hpp"
#include "databento/detail/shared_channel.hpp"
#include "databento/enums.hpp" // Upgrade Policy
#include "databento/file_stream.hpp"
#include "databento/ireadable.hpp"
#include "databento/log.hpp"
#include "databento/record.hpp" // Record, RecordHeader

namespace databento {
// DBN decoder. Set upgrade_policy to control how DBN version 1 data should be
// handled. Defaults to upgrading DBNv1 data to version 2 (the current version).
class DbnDecoder {
public:
explicit DbnDecoder(detail::SharedChannel channel);
explicit DbnDecoder(detail::FileStream file_stream);
explicit DbnDecoder(std::unique_ptr<IReadable> input);
DbnDecoder(std::unique_ptr<IReadable> input,
DbnDecoder(ILogReceiver* log_receiver, detail::SharedChannel channel);
DbnDecoder(ILogReceiver* log_receiver, InFileStream file_stream);
DbnDecoder(ILogReceiver* log_receiver, std::unique_ptr<IReadable> input);
DbnDecoder(ILogReceiver* log_receiver, std::unique_ptr<IReadable> input,
VersionUpgradePolicy upgrade_policy);

// Decode metadata from the given buffer.
static Metadata DecodeMetadata(const std::vector<std::uint8_t>& buffer);
static std::pair<std::uint8_t, std::size_t> DecodeMetadataVersionAndSize(
const std::uint8_t* buffer, std::size_t size);
static Metadata DecodeMetadataFields(std::uint8_t version,
Expand Down Expand Up @@ -60,8 +59,10 @@ class DbnDecoder {
std::vector<std::uint8_t>::const_iterator buffer_end_it);
bool DetectCompression();
std::size_t FillBuffer();
std::size_t GetReadBufferSize() const;
RecordHeader* BufferRecordHeader();

ILogReceiver* log_receiver_;
std::uint8_t version_{};
VersionUpgradePolicy upgrade_policy_;
bool ts_out_{};
Expand Down
39 changes: 39 additions & 0 deletions include/databento/dbn_encoder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#pragma once

#include <cstdint> // uint32_t

#include "databento/dbn.hpp" // Metadata
#include "databento/iwritable.hpp"
#include "databento/record.hpp"
#include "databento/with_ts_out.hpp"

namespace databento {
class DbnEncoder {
public:
explicit DbnEncoder(const Metadata& metadata, IWritable* output);

static void EncodeMetadata(const Metadata& metadata, IWritable* output);
static void EncodeRecord(const Record& record, IWritable* output);

template <typename R>
void EncodeRecord(const R& record) {
static_assert(
has_header_v<R>,
"must be a DBN record struct with an `hd` RecordHeader field");
EncodeRecord(Record{&record.hd});
}
template <typename R>
void EncodeRecord(const WithTsOut<R> record) {
static_assert(
has_header_v<R>,
"must be a DBN record struct with an `hd` RecordHeader field");
EncodeRecord(Record{&record.rec.hd});
}
void EncodeRecord(const Record& record);

private:
static std::uint32_t CalcLength(const Metadata& metadata);

IWritable* output_;
};
} // namespace databento
24 changes: 20 additions & 4 deletions include/databento/dbn_file_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,39 @@

#include <string>

#include "databento/dbn.hpp" // DecodeMetadata
#include "databento/dbn_decoder.hpp" // DbnDecoder
#include "databento/enums.hpp" // VersionUpgradePolicy
#include "databento/timeseries.hpp" // MetadataCallback, RecordCallback
#include "databento/log.hpp"
#include "databento/record.hpp"
#include "databento/timeseries.hpp" // MetadataCallback, RecordCallback

namespace databento {
// A reader for DBN files.
// A reader for DBN files. This class provides both a callback API similar to
// TimeseriesGetRange in historical data and LiveThreaded for live data as well
// as a blocking API similar to that of LiveBlocking. Only one API should be
// used on a given instance.
class DbnFileStore {
public:
explicit DbnFileStore(const std::string& file_path);
DbnFileStore(const std::string& file_path,
DbnFileStore(ILogReceiver* log_receiver, const std::string& file_path,
VersionUpgradePolicy upgrade_policy);

// Callback API: calling Replay consumes the input.
void Replay(const MetadataCallback& metadata_callback,
const RecordCallback& record_callback);
void Replay(const RecordCallback& record_callback);

// Blocking API
const Metadata& GetMetadata();
// Returns the next record or `nullptr` if there are no remaining records.
const Record* NextRecord();

private:
DbnDecoder parser_;
void MaybeDecodeMetadata();

DbnDecoder decoder_;
Metadata metadata_{};
bool has_decoded_metadata_{false};
};
} // namespace databento
34 changes: 29 additions & 5 deletions include/databento/detail/zstd_stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,22 @@
#include <vector>

#include "databento/ireadable.hpp"
#include "databento/iwritable.hpp"
#include "databento/log.hpp"

namespace databento {
namespace detail {
class ZstdStream : public IReadable {
class ZstdDecodeStream : public IReadable {
public:
explicit ZstdStream(std::unique_ptr<IReadable> input);
ZstdStream(std::unique_ptr<IReadable> input,
std::vector<std::uint8_t>&& in_buffer);
explicit ZstdDecodeStream(std::unique_ptr<IReadable> input);
ZstdDecodeStream(std::unique_ptr<IReadable> input,
std::vector<std::uint8_t>&& in_buffer);

// Read exactly `length` bytes into `buffer`.
void ReadExact(std::uint8_t* buffer, std::size_t length) override;
// Read at most `length` bytes. Returns the number of bytes read. Will only
// return 0 if the end of the stream is reached.
size_t ReadSome(std::uint8_t* buffer, std::size_t max_length) override;
std::size_t ReadSome(std::uint8_t* buffer, std::size_t max_length) override;

private:
std::unique_ptr<IReadable> input_;
Expand All @@ -30,5 +32,27 @@ class ZstdStream : public IReadable {
std::vector<std::uint8_t> in_buffer_;
ZSTD_inBuffer z_in_buffer_;
};

class ZstdCompressStream : public IWritable {
public:
explicit ZstdCompressStream(IWritable* output);
ZstdCompressStream(ILogReceiver* log_receiver, IWritable* output);
ZstdCompressStream(const ZstdCompressStream&) = delete;
ZstdCompressStream& operator=(const ZstdCompressStream&) = delete;
ZstdCompressStream(ZstdCompressStream&&) = delete;
ZstdCompressStream& operator=(ZstdCompressStream&&) = delete;
~ZstdCompressStream() override;

void WriteAll(const std::uint8_t* buffer, std::size_t length) override;

private:
ILogReceiver* log_receiver_;
IWritable* output_;
std::unique_ptr<ZSTD_CStream, std::size_t (*)(ZSTD_CStream*)> z_cstream_;
std::vector<std::uint8_t> in_buffer_;
ZSTD_inBuffer z_in_buffer_;
std::size_t in_size_;
std::vector<std::uint8_t> out_buffer_;
};
} // namespace detail
} // namespace databento
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

#include <cstddef> // size_t
#include <cstdint> // uint8_t
#include <fstream> // ifstream
#include <fstream> // ifstream, ofstream
#include <string>

#include "databento/ireadable.hpp"
#include "databento/iwritable.hpp"

namespace databento {
namespace detail {
class FileStream : public IReadable {
class InFileStream : public IReadable {
public:
explicit FileStream(const std::string& file_path);
explicit InFileStream(const std::string& file_path);

// Read exactly `length` bytes into `buffer`.
void ReadExact(std::uint8_t* buffer, std::size_t length) override;
Expand All @@ -22,5 +22,14 @@ class FileStream : public IReadable {
private:
std::ifstream stream_;
};
} // namespace detail

class OutFileStream : public IWritable {
public:
explicit OutFileStream(const std::string& file_path);

void WriteAll(const std::uint8_t* buffer, std::size_t length) override;

private:
std::ofstream stream_;
};
} // namespace databento
1 change: 1 addition & 0 deletions include/databento/historical.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ class Historical {
DbnFileStore TimeseriesGetRangeToFile(const HttplibParams& params,
const std::string& file_path);

ILogReceiver* log_receiver_;
const std::string key_;
const std::string gateway_;
detail::HttpClient client_;
Expand Down
15 changes: 15 additions & 0 deletions include/databento/iwritable.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include <cstddef> // size_t
#include <cstdint> // uint8_t

namespace databento {
// An abstract class for writable objects to allow for runtime polymorphism
// around DBN encoding.
class IWritable {
public:
virtual ~IWritable() = default;

virtual void WriteAll(const std::uint8_t* buffer, std::size_t length) = 0;
};
} // namespace databento
21 changes: 21 additions & 0 deletions src/dbn_constants.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include <cstddef>
#include <cstdint>
#include <limits>

namespace databento {
constexpr std::size_t kMagicSize = 4;
constexpr std::size_t kMetadataPreludeSize = 8;
constexpr std::uint32_t kZstdMagicNumber = 0xFD2FB528;
constexpr auto kDbnPrefix = "DBN";
constexpr std::size_t kFixedMetadataLen = 100;
constexpr std::size_t kDatasetCstrLen = 16;
constexpr std::size_t kMetadataReservedLen = 53;
constexpr std::size_t kMetadataReservedLenV1 = 47;
constexpr std::size_t kBufferCapacity = 8UL * 1024;
constexpr std::uint16_t kNullSchema = std::numeric_limits<std::uint16_t>::max();
constexpr std::uint8_t kNullSType = std::numeric_limits<std::uint8_t>::max();
constexpr std::uint64_t kNullRecordCount =
std::numeric_limits<std::uint64_t>::max();
} // namespace databento
Loading

0 comments on commit 7eb0c14

Please sign in to comment.