Skip to content

Commit

Permalink
Sprint 2 (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
dey4ss committed May 8, 2023
1 parent 8c2d112 commit 2d5d2d1
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ set(
storage/abstract_segment.hpp
storage/chunk.cpp
storage/chunk.hpp
storage/dictionary_segment.cpp
storage/dictionary_segment.hpp
storage/storage_manager.cpp
storage/storage_manager.hpp
storage/table.cpp
Expand Down
97 changes: 97 additions & 0 deletions src/lib/storage/dictionary_segment.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include "dictionary_segment.hpp"

#include "utils/assert.hpp"

namespace opossum {

template <typename T>
DictionarySegment<T>::DictionarySegment(const std::shared_ptr<AbstractSegment>& abstract_segment) {
// Implementation goes here
}

template <typename T>
AllTypeVariant DictionarySegment<T>::operator[](const ChunkOffset chunk_offset) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
T DictionarySegment<T>::get(const ChunkOffset chunk_offset) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
std::optional<T> DictionarySegment<T>::get_typed_value(const ChunkOffset chunk_offset) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
const std::vector<T>& DictionarySegment<T>::dictionary() const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
std::shared_ptr<const AbstractAttributeVector> DictionarySegment<T>::attribute_vector() const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::null_value_id() const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
const T DictionarySegment<T>::value_of_value_id(const ValueID value_id) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::lower_bound(const T value) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::lower_bound(const AllTypeVariant& value) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::upper_bound(const T value) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::upper_bound(const AllTypeVariant& value) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ChunkOffset DictionarySegment<T>::unique_values_count() const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ChunkOffset DictionarySegment<T>::size() const {
// Implementation goes here
return ChunkOffset{};
}

template <typename T>
size_t DictionarySegment<T>::estimate_memory_usage() const {
return size_t{};
}

EXPLICITLY_INSTANTIATE_DATA_TYPES(DictionarySegment);

} // namespace opossum
69 changes: 69 additions & 0 deletions src/lib/storage/dictionary_segment.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#pragma once

#include "abstract_segment.hpp"

namespace opossum {

class AbstractAttributeVector;

// Dictionary is a specific segment type that stores all its values in a vector
template <typename T>
class DictionarySegment : public AbstractSegment {
public:
/**
* Creates a Dictionary segment from a given value segment.
*/
explicit DictionarySegment(const std::shared_ptr<AbstractSegment>& abstract_segment);

// Returns the value at a certain position. If you want to write efficient operators, back off!
AllTypeVariant operator[](const ChunkOffset chunk_offset) const override;

// Returns the value at a certain position. Throws an error if value is NULL.
T get(const ChunkOffset chunk_offset) const;

// Returns the value at a certain position. Returns std::nullopt if the value is NULL.
std::optional<T> get_typed_value(const ChunkOffset chunk_offset) const;

// Returns an underlying dictionary.
const std::vector<T>& dictionary() const;

// Returns an underlying data structure.
std::shared_ptr<const AbstractAttributeVector> attribute_vector() const;

// Returns the ValueID used to represent a NULL value.
ValueID null_value_id() const;

// Returns the value represented by a given ValueID.
const T value_of_value_id(const ValueID value_id) const;

// Returns the first value ID that refers to a value >= the search value. Returns INVALID_VALUE_ID if all values are
// smaller than the search value.
ValueID lower_bound(const T value) const;

// Same as lower_bound(T), but accepts an AllTypeVariant.
ValueID lower_bound(const AllTypeVariant& value) const;

// Returns the first value ID that refers to a value > the search value. Returns INVALID_VALUE_ID if all values are
// smaller than or equal to the search value.
ValueID upper_bound(const T value) const;

// Same as upper_bound(T), but accepts an AllTypeVariant.
ValueID upper_bound(const AllTypeVariant& value) const;

// Returns the number of unique_values (dictionary entries).
ChunkOffset unique_values_count() const;

// Returns the number of entries.
ChunkOffset size() const override;

// Returns the calculated memory usage.
size_t estimate_memory_usage() const final;

protected:
std::vector<T> _dictionary;
std::shared_ptr<AbstractAttributeVector> _attribute_vector;
};

EXPLICITLY_DECLARE_DATA_TYPES(DictionarySegment);

} // namespace opossum
1 change: 1 addition & 0 deletions src/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(
${SHARED_SOURCES}
lib/all_type_variant_test.cpp
storage/chunk_test.cpp
storage/dictionary_segment_test.cpp
storage/storage_manager_test.cpp
storage/table_test.cpp
storage/value_segment_test.cpp
Expand Down
73 changes: 73 additions & 0 deletions src/test/storage/dictionary_segment_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#include "base_test.hpp"

#include "resolve_type.hpp"
#include "storage/abstract_attribute_vector.hpp"
#include "storage/abstract_segment.hpp"
#include "storage/dictionary_segment.hpp"

namespace opossum {

class StorageDictionarySegmentTest : public BaseTest {
protected:
std::shared_ptr<ValueSegment<int32_t>> value_segment_int{std::make_shared<ValueSegment<int32_t>>()};
std::shared_ptr<ValueSegment<std::string>> value_segment_str{std::make_shared<ValueSegment<std::string>>(true)};
};

TEST_F(StorageDictionarySegmentTest, CompressSegmentString) {
value_segment_str->append("Bill");
value_segment_str->append("Steve");
value_segment_str->append("Alexander");
value_segment_str->append("Steve");
value_segment_str->append("Hasso");
value_segment_str->append("Bill");
value_segment_str->append(NULL_VALUE);

const auto dict_segment = std::make_shared<DictionarySegment<std::string>>(value_segment_str);

// Test attribute_vector size.
EXPECT_EQ(dict_segment->size(), 7);

// Test dictionary size (uniqueness).
EXPECT_EQ(dict_segment->unique_values_count(), 4);

// Test sorting.
const auto& dict = dict_segment->dictionary();
EXPECT_EQ(dict[0], "Alexander");
EXPECT_EQ(dict[1], "Bill");
EXPECT_EQ(dict[2], "Hasso");
EXPECT_EQ(dict[3], "Steve");

// Test NULL value handling.
EXPECT_EQ(dict_segment->attribute_vector()->get(6), dict_segment->null_value_id());
EXPECT_EQ(dict_segment->get_typed_value(6), std::nullopt);
EXPECT_THROW(dict_segment->get(6), std::logic_error);
}

TEST_F(StorageDictionarySegmentTest, LowerUpperBound) {
for (auto value = int16_t{0}; value <= 10; value += 2) {
value_segment_int->append(value);
}

std::shared_ptr<AbstractSegment> segment;
resolve_data_type("int", [&](auto type) {
using Type = typename decltype(type)::type;
segment = std::make_shared<DictionarySegment<Type>>(value_segment_int);
});
auto dict_segment = std::dynamic_pointer_cast<DictionarySegment<int32_t>>(segment);

EXPECT_EQ(dict_segment->lower_bound(4), ValueID{2});
EXPECT_EQ(dict_segment->upper_bound(4), ValueID{3});

EXPECT_EQ(dict_segment->lower_bound(AllTypeVariant{4}), ValueID{2});
EXPECT_EQ(dict_segment->upper_bound(AllTypeVariant{4}), ValueID{3});

EXPECT_EQ(dict_segment->lower_bound(5), ValueID{3});
EXPECT_EQ(dict_segment->upper_bound(5), ValueID{3});

EXPECT_EQ(dict_segment->lower_bound(15), INVALID_VALUE_ID);
EXPECT_EQ(dict_segment->upper_bound(15), INVALID_VALUE_ID);
}

// TODO(student): You should add some more tests here (full coverage would be appreciated) and possibly in other files.

} // namespace opossum
11 changes: 11 additions & 0 deletions src/test/storage/table_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,15 @@ TEST_F(StorageTableTest, SegmentsNullable) {
EXPECT_TRUE(value_segment_2->is_nullable());
}

TEST_F(StorageTableTest, AppendWithEncodedSegments) {
table.append({1, "foo"});
EXPECT_EQ(table.row_count(), 1);

table.compress_chunk(ChunkID{0});
table.append({2, "bar"});

EXPECT_EQ(table.row_count(), 2);
EXPECT_EQ(table.chunk_count(), 2);
}

} // namespace opossum

0 comments on commit 2d5d2d1

Please sign in to comment.