Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sprint 2 #3

Merged
merged 18 commits into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ set(
storage/abstract_segment.hpp
storage/chunk.cpp
storage/chunk.hpp
storage/dictionary_segment.cpp
storage/dictionary_segment.hpp
storage/storage_manager.cpp
storage/storage_manager.hpp
storage/table.cpp
Expand Down
97 changes: 97 additions & 0 deletions src/lib/storage/dictionary_segment.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include "dictionary_segment.hpp"

#include "utils/assert.hpp"

namespace opossum {

template <typename T>
DictionarySegment<T>::DictionarySegment(const std::shared_ptr<AbstractSegment>& abstract_segment) {
// Implementation goes here
}

template <typename T>
AllTypeVariant DictionarySegment<T>::operator[](const ChunkOffset chunk_offset) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
T DictionarySegment<T>::get(const ChunkOffset chunk_offset) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
std::optional<T> DictionarySegment<T>::get_typed_value(const ChunkOffset chunk_offset) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
const std::vector<T>& DictionarySegment<T>::dictionary() const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
std::shared_ptr<const AbstractAttributeVector> DictionarySegment<T>::attribute_vector() const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::null_value_id() const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
const T DictionarySegment<T>::value_of_value_id(const ValueID value_id) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::lower_bound(const T value) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::lower_bound(const AllTypeVariant& value) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::upper_bound(const T value) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ValueID DictionarySegment<T>::upper_bound(const AllTypeVariant& value) const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ChunkOffset DictionarySegment<T>::unique_values_count() const {
// Implementation goes here
Fail("Implementation is missing.");
}

template <typename T>
ChunkOffset DictionarySegment<T>::size() const {
// Implementation goes here
return ChunkOffset{};
}

template <typename T>
size_t DictionarySegment<T>::estimate_memory_usage() const {
return size_t{};
}

EXPLICITLY_INSTANTIATE_DATA_TYPES(DictionarySegment);

} // namespace opossum
69 changes: 69 additions & 0 deletions src/lib/storage/dictionary_segment.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#pragma once

#include "abstract_segment.hpp"

namespace opossum {

class AbstractAttributeVector;

// Dictionary is a specific segment type that stores all its values in a vector
template <typename T>
class DictionarySegment : public AbstractSegment {
public:
/**
* Creates a Dictionary segment from a given value segment.
*/
explicit DictionarySegment(const std::shared_ptr<AbstractSegment>& abstract_segment);

// Returns the value at a certain position. If you want to write efficient operators, back off!
AllTypeVariant operator[](const ChunkOffset chunk_offset) const override;

// Returns the value at a certain position. Throws an error if value is NULL.
T get(const ChunkOffset chunk_offset) const;

// Returns the value at a certain position. Returns std::nullopt if the value is NULL.
std::optional<T> get_typed_value(const ChunkOffset chunk_offset) const;

// Returns an underlying dictionary.
const std::vector<T>& dictionary() const;

// Returns an underlying data structure.
std::shared_ptr<const AbstractAttributeVector> attribute_vector() const;

// Returns the ValueID used to represent a NULL value.
ValueID null_value_id() const;

// Returns the value represented by a given ValueID.
const T value_of_value_id(const ValueID value_id) const;

// Returns the first value ID that refers to a value >= the search value. Returns INVALID_VALUE_ID if all values are
// smaller than the search value.
ValueID lower_bound(const T value) const;

// Same as lower_bound(T), but accepts an AllTypeVariant.
ValueID lower_bound(const AllTypeVariant& value) const;

// Returns the first value ID that refers to a value > the search value. Returns INVALID_VALUE_ID if all values are
// smaller than or equal to the search value.
ValueID upper_bound(const T value) const;

// Same as upper_bound(T), but accepts an AllTypeVariant.
ValueID upper_bound(const AllTypeVariant& value) const;

// Returns the number of unique_values (dictionary entries).
ChunkOffset unique_values_count() const;

// Returns the number of entries.
ChunkOffset size() const override;

// Returns the calculated memory usage.
size_t estimate_memory_usage() const final;

protected:
std::vector<T> _dictionary;
std::shared_ptr<AbstractAttributeVector> _attribute_vector;
};

EXPLICITLY_DECLARE_DATA_TYPES(DictionarySegment);

} // namespace opossum
1 change: 1 addition & 0 deletions src/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(
${SHARED_SOURCES}
lib/all_type_variant_test.cpp
storage/chunk_test.cpp
storage/dictionary_segment_test.cpp
storage/storage_manager_test.cpp
storage/table_test.cpp
storage/value_segment_test.cpp
Expand Down
73 changes: 73 additions & 0 deletions src/test/storage/dictionary_segment_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#include "base_test.hpp"

#include "resolve_type.hpp"
#include "storage/abstract_attribute_vector.hpp"
#include "storage/abstract_segment.hpp"
#include "storage/dictionary_segment.hpp"

namespace opossum {

class StorageDictionarySegmentTest : public BaseTest {
protected:
std::shared_ptr<ValueSegment<int32_t>> value_segment_int{std::make_shared<ValueSegment<int32_t>>()};
std::shared_ptr<ValueSegment<std::string>> value_segment_str{std::make_shared<ValueSegment<std::string>>(true)};
};

TEST_F(StorageDictionarySegmentTest, CompressSegmentString) {
value_segment_str->append("Bill");
value_segment_str->append("Steve");
value_segment_str->append("Alexander");
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<3

value_segment_str->append("Steve");
value_segment_str->append("Hasso");
value_segment_str->append("Bill");
value_segment_str->append(NULL_VALUE);

const auto dict_segment = std::make_shared<DictionarySegment<std::string>>(value_segment_str);

// Test attribute_vector size.
EXPECT_EQ(dict_segment->size(), 7);

// Test dictionary size (uniqueness).
EXPECT_EQ(dict_segment->unique_values_count(), 4);

// Test sorting.
const auto& dict = dict_segment->dictionary();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::is_sorted()?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kann man machen, explizit schadet mMn hier nicht. Hab nichts angefasst außer das, wo NULL steht

EXPECT_EQ(dict[0], "Alexander");
EXPECT_EQ(dict[1], "Bill");
EXPECT_EQ(dict[2], "Hasso");
EXPECT_EQ(dict[3], "Steve");

// Test NULL value handling.
EXPECT_EQ(dict_segment->attribute_vector()->get(6), dict_segment->null_value_id());
EXPECT_EQ(dict_segment->get_typed_value(6), std::nullopt);
EXPECT_THROW(dict_segment->get(6), std::logic_error);
}

TEST_F(StorageDictionarySegmentTest, LowerUpperBound) {
for (auto value = int16_t{0}; value <= 10; value += 2) {
value_segment_int->append(value);
}

std::shared_ptr<AbstractSegment> segment;
resolve_data_type("int", [&](auto type) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mit Absicht so umständlich?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, ist wohl Absicht. Alles klar.

using Type = typename decltype(type)::type;
segment = std::make_shared<DictionarySegment<Type>>(value_segment_int);
});
auto dict_segment = std::dynamic_pointer_cast<DictionarySegment<int32_t>>(segment);

EXPECT_EQ(dict_segment->lower_bound(4), ValueID{2});
EXPECT_EQ(dict_segment->upper_bound(4), ValueID{3});

EXPECT_EQ(dict_segment->lower_bound(AllTypeVariant{4}), ValueID{2});
EXPECT_EQ(dict_segment->upper_bound(AllTypeVariant{4}), ValueID{3});

EXPECT_EQ(dict_segment->lower_bound(5), ValueID{3});
EXPECT_EQ(dict_segment->upper_bound(5), ValueID{3});

EXPECT_EQ(dict_segment->lower_bound(15), INVALID_VALUE_ID);
EXPECT_EQ(dict_segment->upper_bound(15), INVALID_VALUE_ID);
}

// TODO(student): You should add some more tests here (full coverage would be appreciated) and possibly in other files.

} // namespace opossum
11 changes: 11 additions & 0 deletions src/test/storage/table_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,15 @@ TEST_F(StorageTableTest, SegmentsNullable) {
EXPECT_TRUE(value_segment_2->is_nullable());
}

TEST_F(StorageTableTest, AppendWithEncodedSegments) {
table.append({1, "foo"});
EXPECT_EQ(table.row_count(), 1);

table.compress_chunk(ChunkID{0});
table.append({2, "bar"});

EXPECT_EQ(table.row_count(), 2);
EXPECT_EQ(table.chunk_count(), 2);
}

} // namespace opossum