forked from hyrise/DYOD_SoSe23
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c1ab358
commit b677dd8
Showing
6 changed files
with
253 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#include "dictionary_segment.hpp" | ||
|
||
#include "utils/assert.hpp" | ||
|
||
namespace opossum { | ||
|
||
template <typename T> | ||
DictionarySegment<T>::DictionarySegment(const std::shared_ptr<AbstractSegment>& abstract_segment) { | ||
// Implementation goes here | ||
} | ||
|
||
template <typename T> | ||
AllTypeVariant DictionarySegment<T>::operator[](const ChunkOffset chunk_offset) const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
T DictionarySegment<T>::get(const ChunkOffset chunk_offset) const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
std::optional<T> DictionarySegment<T>::get_typed_value(const ChunkOffset chunk_offset) const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
const std::vector<T>& DictionarySegment<T>::dictionary() const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
std::shared_ptr<const AbstractAttributeVector> DictionarySegment<T>::attribute_vector() const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
ValueID DictionarySegment<T>::null_value_id() const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
const T DictionarySegment<T>::value_of_value_id(const ValueID value_id) const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
ValueID DictionarySegment<T>::lower_bound(const T value) const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
ValueID DictionarySegment<T>::lower_bound(const AllTypeVariant& value) const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
ValueID DictionarySegment<T>::upper_bound(const T value) const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
ValueID DictionarySegment<T>::upper_bound(const AllTypeVariant& value) const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
ChunkOffset DictionarySegment<T>::unique_values_count() const { | ||
// Implementation goes here | ||
Fail("Implementation is missing."); | ||
} | ||
|
||
template <typename T> | ||
ChunkOffset DictionarySegment<T>::size() const { | ||
// Implementation goes here | ||
return ChunkOffset{}; | ||
} | ||
|
||
template <typename T> | ||
size_t DictionarySegment<T>::estimate_memory_usage() const { | ||
return size_t{}; | ||
} | ||
|
||
EXPLICITLY_INSTANTIATE_DATA_TYPES(DictionarySegment); | ||
|
||
} // namespace opossum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#pragma once | ||
|
||
#include "abstract_segment.hpp" | ||
|
||
namespace opossum { | ||
|
||
class AbstractAttributeVector; | ||
|
||
// Dictionary is a specific segment type that stores all its values in a vector | ||
template <typename T> | ||
class DictionarySegment : public AbstractSegment { | ||
public: | ||
/** | ||
* Creates a Dictionary segment from a given value segment. | ||
*/ | ||
explicit DictionarySegment(const std::shared_ptr<AbstractSegment>& abstract_segment); | ||
|
||
// Returns the value at a certain position. If you want to write efficient operators, back off! | ||
AllTypeVariant operator[](const ChunkOffset chunk_offset) const override; | ||
|
||
// Returns the value at a certain position. Throws an error if value is NULL. | ||
T get(const ChunkOffset chunk_offset) const; | ||
|
||
// Returns the value at a certain position. Returns std::nullopt if the value is NULL. | ||
std::optional<T> get_typed_value(const ChunkOffset chunk_offset) const; | ||
|
||
// Returns an underlying dictionary. | ||
const std::vector<T>& dictionary() const; | ||
|
||
// Returns an underlying data structure. | ||
std::shared_ptr<const AbstractAttributeVector> attribute_vector() const; | ||
|
||
// Returns the ValueID used to represent a NULL value. | ||
ValueID null_value_id() const; | ||
|
||
// Returns the value represented by a given ValueID. | ||
const T value_of_value_id(const ValueID value_id) const; | ||
|
||
// Returns the first value ID that refers to a value >= the search value. Returns INVALID_VALUE_ID if all values are | ||
// smaller than the search value. | ||
ValueID lower_bound(const T value) const; | ||
|
||
// Same as lower_bound(T), but accepts an AllTypeVariant. | ||
ValueID lower_bound(const AllTypeVariant& value) const; | ||
|
||
// Returns the first value ID that refers to a value > the search value. Returns INVALID_VALUE_ID if all values are | ||
// smaller than or equal to the search value. | ||
ValueID upper_bound(const T value) const; | ||
|
||
// Same as upper_bound(T), but accepts an AllTypeVariant. | ||
ValueID upper_bound(const AllTypeVariant& value) const; | ||
|
||
// Returns the number of unique_values (dictionary entries). | ||
ChunkOffset unique_values_count() const; | ||
|
||
// Returns the number of entries. | ||
ChunkOffset size() const override; | ||
|
||
// Returns the calculated memory usage. | ||
size_t estimate_memory_usage() const final; | ||
|
||
protected: | ||
std::vector<T> _dictionary; | ||
std::shared_ptr<AbstractAttributeVector> _attribute_vector; | ||
}; | ||
|
||
EXPLICITLY_DECLARE_DATA_TYPES(DictionarySegment); | ||
|
||
} // namespace opossum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#include "base_test.hpp" | ||
|
||
#include "resolve_type.hpp" | ||
#include "storage/abstract_attribute_vector.hpp" | ||
#include "storage/abstract_segment.hpp" | ||
#include "storage/dictionary_segment.hpp" | ||
|
||
namespace opossum { | ||
|
||
class StorageDictionarySegmentTest : public BaseTest { | ||
protected: | ||
std::shared_ptr<ValueSegment<int32_t>> value_segment_int{std::make_shared<ValueSegment<int32_t>>()}; | ||
std::shared_ptr<ValueSegment<std::string>> value_segment_str{std::make_shared<ValueSegment<std::string>>(true)}; | ||
}; | ||
|
||
TEST_F(StorageDictionarySegmentTest, CompressSegmentString) { | ||
value_segment_str->append("Bill"); | ||
value_segment_str->append("Steve"); | ||
value_segment_str->append("Alexander"); | ||
value_segment_str->append("Steve"); | ||
value_segment_str->append("Hasso"); | ||
value_segment_str->append("Bill"); | ||
value_segment_str->append(NULL_VALUE); | ||
|
||
const auto dict_segment = std::make_shared<DictionarySegment<std::string>>(value_segment_str); | ||
|
||
// Test attribute_vector size. | ||
EXPECT_EQ(dict_segment->size(), 7); | ||
|
||
// Test dictionary size (uniqueness). | ||
EXPECT_EQ(dict_segment->unique_values_count(), 4); | ||
|
||
// Test sorting. | ||
const auto& dict = dict_segment->dictionary(); | ||
EXPECT_EQ(dict[0], "Alexander"); | ||
EXPECT_EQ(dict[1], "Bill"); | ||
EXPECT_EQ(dict[2], "Hasso"); | ||
EXPECT_EQ(dict[3], "Steve"); | ||
|
||
// Test NULL value handling. | ||
EXPECT_EQ(dict_segment->attribute_vector()->get(6), dict_segment->null_value_id()); | ||
EXPECT_EQ(dict_segment->get_typed_value(6), std::nullopt); | ||
EXPECT_THROW(dict_segment->get(6), std::logic_error); | ||
} | ||
|
||
TEST_F(StorageDictionarySegmentTest, LowerUpperBound) { | ||
for (auto value = int16_t{0}; value <= 10; value += 2) { | ||
value_segment_int->append(value); | ||
} | ||
|
||
std::shared_ptr<AbstractSegment> segment; | ||
resolve_data_type("int", [&](auto type) { | ||
using Type = typename decltype(type)::type; | ||
segment = std::make_shared<DictionarySegment<Type>>(value_segment_int); | ||
}); | ||
auto dict_segment = std::dynamic_pointer_cast<DictionarySegment<int32_t>>(segment); | ||
|
||
EXPECT_EQ(dict_segment->lower_bound(4), ValueID{2}); | ||
EXPECT_EQ(dict_segment->upper_bound(4), ValueID{3}); | ||
|
||
EXPECT_EQ(dict_segment->lower_bound(AllTypeVariant{4}), ValueID{2}); | ||
EXPECT_EQ(dict_segment->upper_bound(AllTypeVariant{4}), ValueID{3}); | ||
|
||
EXPECT_EQ(dict_segment->lower_bound(5), ValueID{3}); | ||
EXPECT_EQ(dict_segment->upper_bound(5), ValueID{3}); | ||
|
||
EXPECT_EQ(dict_segment->lower_bound(15), INVALID_VALUE_ID); | ||
EXPECT_EQ(dict_segment->upper_bound(15), INVALID_VALUE_ID); | ||
} | ||
|
||
// TODO(student): You should add some more tests here (full coverage would be appreciated) and possibly in other files. | ||
|
||
} // namespace opossum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters