diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 4f05180..b0f0faf 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -8,6 +8,8 @@ set( storage/abstract_segment.hpp storage/chunk.cpp storage/chunk.hpp + storage/dictionary_segment.cpp + storage/dictionary_segment.hpp storage/storage_manager.cpp storage/storage_manager.hpp storage/table.cpp diff --git a/src/lib/storage/dictionary_segment.cpp b/src/lib/storage/dictionary_segment.cpp new file mode 100644 index 0000000..2a88e83 --- /dev/null +++ b/src/lib/storage/dictionary_segment.cpp @@ -0,0 +1,97 @@ +#include "dictionary_segment.hpp" + +#include "utils/assert.hpp" + +namespace opossum { + +template +DictionarySegment::DictionarySegment(const std::shared_ptr& abstract_segment) { + // Implementation goes here +} + +template +AllTypeVariant DictionarySegment::operator[](const ChunkOffset chunk_offset) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +T DictionarySegment::get(const ChunkOffset chunk_offset) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +std::optional DictionarySegment::get_typed_value(const ChunkOffset chunk_offset) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +const std::vector& DictionarySegment::dictionary() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +std::shared_ptr DictionarySegment::attribute_vector() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +ValueID DictionarySegment::null_value_id() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +const T DictionarySegment::value_of_value_id(const ValueID value_id) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +ValueID DictionarySegment::lower_bound(const T value) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +ValueID DictionarySegment::lower_bound(const AllTypeVariant& value) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +ValueID DictionarySegment::upper_bound(const T value) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +ValueID DictionarySegment::upper_bound(const AllTypeVariant& value) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +ChunkOffset DictionarySegment::unique_values_count() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +template +ChunkOffset DictionarySegment::size() const { + // Implementation goes here + return ChunkOffset{}; +} + +template +size_t DictionarySegment::estimate_memory_usage() const { + return size_t{}; +} + +EXPLICITLY_INSTANTIATE_DATA_TYPES(DictionarySegment); + +} // namespace opossum diff --git a/src/lib/storage/dictionary_segment.hpp b/src/lib/storage/dictionary_segment.hpp new file mode 100644 index 0000000..a87116a --- /dev/null +++ b/src/lib/storage/dictionary_segment.hpp @@ -0,0 +1,69 @@ +#pragma once + +#include "abstract_segment.hpp" + +namespace opossum { + +class AbstractAttributeVector; + +// Dictionary is a specific segment type that stores all its values in a vector +template +class DictionarySegment : public AbstractSegment { + public: + /** + * Creates a Dictionary segment from a given value segment. + */ + explicit DictionarySegment(const std::shared_ptr& abstract_segment); + + // Returns the value at a certain position. If you want to write efficient operators, back off! + AllTypeVariant operator[](const ChunkOffset chunk_offset) const override; + + // Returns the value at a certain position. Throws an error if value is NULL. + T get(const ChunkOffset chunk_offset) const; + + // Returns the value at a certain position. Returns std::nullopt if the value is NULL. + std::optional get_typed_value(const ChunkOffset chunk_offset) const; + + // Returns an underlying dictionary. + const std::vector& dictionary() const; + + // Returns an underlying data structure. + std::shared_ptr attribute_vector() const; + + // Returns the ValueID used to represent a NULL value. + ValueID null_value_id() const; + + // Returns the value represented by a given ValueID. + const T value_of_value_id(const ValueID value_id) const; + + // Returns the first value ID that refers to a value >= the search value. Returns INVALID_VALUE_ID if all values are + // smaller than the search value. + ValueID lower_bound(const T value) const; + + // Same as lower_bound(T), but accepts an AllTypeVariant. + ValueID lower_bound(const AllTypeVariant& value) const; + + // Returns the first value ID that refers to a value > the search value. Returns INVALID_VALUE_ID if all values are + // smaller than or equal to the search value. + ValueID upper_bound(const T value) const; + + // Same as upper_bound(T), but accepts an AllTypeVariant. + ValueID upper_bound(const AllTypeVariant& value) const; + + // Returns the number of unique_values (dictionary entries). + ChunkOffset unique_values_count() const; + + // Returns the number of entries. + ChunkOffset size() const override; + + // Returns the calculated memory usage. + size_t estimate_memory_usage() const final; + + protected: + std::vector _dictionary; + std::shared_ptr _attribute_vector; +}; + +EXPLICITLY_DECLARE_DATA_TYPES(DictionarySegment); + +} // namespace opossum diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index d6071d2..79f4bb3 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -11,6 +11,7 @@ set( ${SHARED_SOURCES} lib/all_type_variant_test.cpp storage/chunk_test.cpp + storage/dictionary_segment_test.cpp storage/storage_manager_test.cpp storage/table_test.cpp storage/value_segment_test.cpp diff --git a/src/test/storage/dictionary_segment_test.cpp b/src/test/storage/dictionary_segment_test.cpp new file mode 100644 index 0000000..2bcf607 --- /dev/null +++ b/src/test/storage/dictionary_segment_test.cpp @@ -0,0 +1,73 @@ +#include "base_test.hpp" + +#include "resolve_type.hpp" +#include "storage/abstract_attribute_vector.hpp" +#include "storage/abstract_segment.hpp" +#include "storage/dictionary_segment.hpp" + +namespace opossum { + +class StorageDictionarySegmentTest : public BaseTest { + protected: + std::shared_ptr> value_segment_int{std::make_shared>()}; + std::shared_ptr> value_segment_str{std::make_shared>(true)}; +}; + +TEST_F(StorageDictionarySegmentTest, CompressSegmentString) { + value_segment_str->append("Bill"); + value_segment_str->append("Steve"); + value_segment_str->append("Alexander"); + value_segment_str->append("Steve"); + value_segment_str->append("Hasso"); + value_segment_str->append("Bill"); + value_segment_str->append(NULL_VALUE); + + const auto dict_segment = std::make_shared>(value_segment_str); + + // Test attribute_vector size. + EXPECT_EQ(dict_segment->size(), 7); + + // Test dictionary size (uniqueness). + EXPECT_EQ(dict_segment->unique_values_count(), 4); + + // Test sorting. + const auto& dict = dict_segment->dictionary(); + EXPECT_EQ(dict[0], "Alexander"); + EXPECT_EQ(dict[1], "Bill"); + EXPECT_EQ(dict[2], "Hasso"); + EXPECT_EQ(dict[3], "Steve"); + + // Test NULL value handling. + EXPECT_EQ(dict_segment->attribute_vector()->get(6), dict_segment->null_value_id()); + EXPECT_EQ(dict_segment->get_typed_value(6), std::nullopt); + EXPECT_THROW(dict_segment->get(6), std::logic_error); +} + +TEST_F(StorageDictionarySegmentTest, LowerUpperBound) { + for (auto value = int16_t{0}; value <= 10; value += 2) { + value_segment_int->append(value); + } + + std::shared_ptr segment; + resolve_data_type("int", [&](auto type) { + using Type = typename decltype(type)::type; + segment = std::make_shared>(value_segment_int); + }); + auto dict_segment = std::dynamic_pointer_cast>(segment); + + EXPECT_EQ(dict_segment->lower_bound(4), ValueID{2}); + EXPECT_EQ(dict_segment->upper_bound(4), ValueID{3}); + + EXPECT_EQ(dict_segment->lower_bound(AllTypeVariant{4}), ValueID{2}); + EXPECT_EQ(dict_segment->upper_bound(AllTypeVariant{4}), ValueID{3}); + + EXPECT_EQ(dict_segment->lower_bound(5), ValueID{3}); + EXPECT_EQ(dict_segment->upper_bound(5), ValueID{3}); + + EXPECT_EQ(dict_segment->lower_bound(15), INVALID_VALUE_ID); + EXPECT_EQ(dict_segment->upper_bound(15), INVALID_VALUE_ID); +} + +// TODO(student): You should add some more tests here (full coverage would be appreciated) and possibly in other files. + +} // namespace opossum diff --git a/src/test/storage/table_test.cpp b/src/test/storage/table_test.cpp index d79d635..342ed26 100644 --- a/src/test/storage/table_test.cpp +++ b/src/test/storage/table_test.cpp @@ -96,4 +96,15 @@ TEST_F(StorageTableTest, SegmentsNullable) { EXPECT_TRUE(value_segment_2->is_nullable()); } +TEST_F(StorageTableTest, AppendWithEncodedSegments) { + table.append({1, "foo"}); + EXPECT_EQ(table.row_count(), 1); + + table.compress_chunk(ChunkID{0}); + table.append({2, "bar"}); + + EXPECT_EQ(table.row_count(), 2); + EXPECT_EQ(table.chunk_count(), 2); +} + } // namespace opossum