From 185423c7670ef7791149a798ad4cdf81d70967bd Mon Sep 17 00:00:00 2001 From: Daniel Lindner <27929897+dey4ss@users.noreply.github.com> Date: Mon, 22 May 2023 12:51:30 +0200 Subject: [PATCH] Sprint 3 (#5) --- src/lib/CMakeLists.txt | 10 + src/lib/operators/abstract_operator.cpp | 27 ++ src/lib/operators/abstract_operator.hpp | 59 ++++ src/lib/operators/get_table.hpp | 27 ++ src/lib/operators/print.cpp | 110 ++++++++ src/lib/operators/print.hpp | 24 ++ src/lib/operators/table_scan.hpp | 37 +++ src/lib/operators/table_wrapper.cpp | 10 + src/lib/operators/table_wrapper.hpp | 21 ++ src/lib/storage/reference_segment.cpp | 43 +++ src/lib/storage/reference_segment.hpp | 29 ++ src/test/CMakeLists.txt | 4 + src/test/operators/get_table_test.cpp | 30 ++ src/test/operators/print_test.cpp | 110 ++++++++ src/test/operators/table_scan_test.cpp | 286 ++++++++++++++++++++ src/test/storage/reference_segment_test.cpp | 95 +++++++ 16 files changed, 922 insertions(+) create mode 100644 src/lib/operators/abstract_operator.cpp create mode 100644 src/lib/operators/abstract_operator.hpp create mode 100644 src/lib/operators/get_table.hpp create mode 100644 src/lib/operators/print.cpp create mode 100644 src/lib/operators/print.hpp create mode 100644 src/lib/operators/table_scan.hpp create mode 100644 src/lib/operators/table_wrapper.cpp create mode 100644 src/lib/operators/table_wrapper.hpp create mode 100644 src/lib/storage/reference_segment.cpp create mode 100644 src/lib/storage/reference_segment.hpp create mode 100644 src/test/operators/get_table_test.cpp create mode 100644 src/test/operators/print_test.cpp create mode 100644 src/test/operators/table_scan_test.cpp create mode 100644 src/test/storage/reference_segment_test.cpp diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index b0f0faf..08157f8 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -3,6 +3,14 @@ set( SOURCES all_type_variant.hpp null_value.hpp + operators/abstract_operator.cpp + operators/abstract_operator.hpp + operators/get_table.hpp + operators/print.cpp + operators/print.hpp + operators/table_scan.hpp + operators/table_wrapper.cpp + operators/table_wrapper.hpp resolve_type.hpp storage/abstract_attribute_vector.hpp storage/abstract_segment.hpp @@ -10,6 +18,8 @@ set( storage/chunk.hpp storage/dictionary_segment.cpp storage/dictionary_segment.hpp + storage/reference_segment.cpp + storage/reference_segment.hpp storage/storage_manager.cpp storage/storage_manager.hpp storage/table.cpp diff --git a/src/lib/operators/abstract_operator.cpp b/src/lib/operators/abstract_operator.cpp new file mode 100644 index 0000000..d82a196 --- /dev/null +++ b/src/lib/operators/abstract_operator.cpp @@ -0,0 +1,27 @@ +#include "abstract_operator.hpp" + +namespace opossum { + +AbstractOperator::AbstractOperator(const std::shared_ptr left, + const std::shared_ptr right) + : _left_input(left), _right_input(right) {} + +void AbstractOperator::execute() { + _output = _on_execute(); +} + +std::shared_ptr AbstractOperator::get_output() const { + // TODO(student): You should place some meaningful checks here + + return _output; +} + +std::shared_ptr AbstractOperator::_left_input_table() const { + return _left_input->get_output(); +} + +std::shared_ptr AbstractOperator::_right_input_table() const { + return _right_input->get_output(); +} + +} // namespace opossum diff --git a/src/lib/operators/abstract_operator.hpp b/src/lib/operators/abstract_operator.hpp new file mode 100644 index 0000000..eeeef06 --- /dev/null +++ b/src/lib/operators/abstract_operator.hpp @@ -0,0 +1,59 @@ +#pragma once + +#include + +#include "types.hpp" + +namespace opossum { + +class Table; + +// AbstractOperator is the abstract super class for all operators. All operators have up to two input tables and one +// output table. Their lifecycle has three phases: +// 1. The operator is constructed. Previous operators are not guaranteed to have already executed, so operators must not +// call get_output in their execute method +// 2. The execute method is called from the outside (usually by the scheduler). This is where the heavy lifting is done. +// By now, the input operators have already executed. +// 3. The consumer (usually another operator) calls get_output. This should be very cheap. It is only guaranteed to +// succeed if execute was called before. Otherwise, a nullptr or an empty table could be returned. +// +// Operators shall not be executed twice. + +class AbstractOperator : private Noncopyable { + public: + AbstractOperator(const std::shared_ptr left = nullptr, + const std::shared_ptr right = nullptr); + + virtual ~AbstractOperator() = default; + + // We need to explicitly set the move constructor to default when we overwrite the copy constructor. + AbstractOperator(AbstractOperator&&) = default; + AbstractOperator& operator=(AbstractOperator&&) = default; + + void execute(); + + // Returns the result of the operator. + std::shared_ptr get_output() const; + + // Get the input operators. + std::shared_ptr left_input() const; + std::shared_ptr right_input() const; + + protected: + // Abstract method to actually execute the operator execute and get_output are split into two methods to allow for + // easier asynchronous execution. + virtual std::shared_ptr _on_execute() = 0; + + std::shared_ptr _left_input_table() const; + std::shared_ptr _right_input_table() const; + + // Shared pointers to input operators. Can be nullptr, for example, if an operator is the leaf operator in the query + // plan or if the operator has only one input operator. + std::shared_ptr _left_input; + std::shared_ptr _right_input; + + // Is nullptr until the operator is executed. + std::shared_ptr _output; +}; + +} // namespace opossum diff --git a/src/lib/operators/get_table.hpp b/src/lib/operators/get_table.hpp new file mode 100644 index 0000000..3b7f1d9 --- /dev/null +++ b/src/lib/operators/get_table.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include "abstract_operator.hpp" +#include "utils/assert.hpp" + +namespace opossum { + +// Operator to retrieve a table from the StorageManager by specifying its name. +class GetTable : public AbstractOperator { + public: + explicit GetTable(const std::string& name) { + // TODO(student) implement it in a source file and change this to a declaration. + } + + const std::string& table_name() const { + // TODO(student) implement it in a source file and change this to a declaration. + Fail("Implementation missing."); + } + + protected: + std::shared_ptr _on_execute() override { + // TODO(student) implement it in a source file and change this to a declaration. + Fail("Implementation missing."); + } +}; + +} // namespace opossum diff --git a/src/lib/operators/print.cpp b/src/lib/operators/print.cpp new file mode 100644 index 0000000..567bca7 --- /dev/null +++ b/src/lib/operators/print.cpp @@ -0,0 +1,110 @@ +#include "print.hpp" + +#include + +#include "operators/table_wrapper.hpp" +#include "storage/abstract_segment.hpp" +#include "storage/table.hpp" +#include "type_cast.hpp" + +namespace { + +using namespace opossum; // NOLINT(build/namespaces) + +std::string print_column_type(const std::shared_ptr& table, const ColumnID column_id) { + auto stream = std::stringstream{}; + stream << table->column_type(column_id); + if (table->column_nullable(column_id)) { + stream << "_null"; + } + + return stream.str(); +} + +} // namespace + +namespace opossum { + +Print::Print(const std::shared_ptr in, std::ostream& out) : AbstractOperator(in), _out(out) {} + +void Print::print(std::shared_ptr& table, std::ostream& out) { + auto table_wrapper = std::make_shared(table); + table_wrapper->execute(); + Print(table_wrapper, out).execute(); +} + +std::shared_ptr Print::_on_execute() { + auto widths = _column_string_widths(8, 20, _left_input_table()); + + // Print column headers. + _out << "=== Columns" << std::endl; + const auto left_column_count = _left_input_table()->column_count(); + for (auto column_id = ColumnID{0}; column_id < left_column_count; ++column_id) { + _out << "|" << std::setw(widths[column_id]) << _left_input_table()->column_name(column_id) << std::setw(0); + } + _out << "|" << std::endl; + for (auto column_id = ColumnID{0}; column_id < left_column_count; ++column_id) { + _out << "|" << std::setw(widths[column_id]) << print_column_type(_left_input_table(), column_id) << std::setw(0); + } + _out << "|" << std::endl; + + // print each chunk + const auto left_chunk_count = _left_input_table()->chunk_count(); + for (auto chunk_id = ChunkID{0}; chunk_id < left_chunk_count; ++chunk_id) { + const auto chunk = _left_input_table()->get_chunk(chunk_id); + + _out << "=== Chunk " << chunk_id << " === " << std::endl; + + if (chunk->size() == 0) { + _out << "Empty chunk." << std::endl; + continue; + } + + // Print the rows in the chunk. + const auto chunk_size = chunk->size(); + for (size_t row = 0; row < chunk_size; ++row) { + _out << "|"; + const auto column_count = chunk->column_count(); + for (auto column_id = ColumnID{0}; column_id < column_count; ++column_id) { + // Yes, we use AbstractSegment::operator[] here, but since Print is not an operation that should be part of a + // regular query plan, let's keep things simple here. + _out << std::setw(widths[column_id]) << (*chunk->get_segment(column_id))[row] << "|" << std::setw(0); + } + + _out << std::endl; + } + } + + return _left_input_table(); +} + +// In order to print the table as an actual table, with columns being aligned, we need to calculate the number of +// characters in the printed representation of each column `min` and `max` can be used to limit the width of the +// columns - however, every column fits at least the column's name. +std::vector Print::_column_string_widths(uint16_t min, uint16_t max, + const std::shared_ptr& table) const { + auto widths = std::vector(table->column_count()); + // Calculate the length of the column name. + const auto column_count = table->column_count(); + for (auto column_id = ColumnID{0}; column_id < column_count; ++column_id) { + widths[column_id] = std::max(min, static_cast(table->column_name(column_id).size())); + } + + // Go over all rows and find the maximum length of the printed representation of a value, up to max. + const auto left_chunk_count = _left_input_table()->chunk_count(); + for (auto chunk_id = ChunkID{0}; chunk_id < left_chunk_count; ++chunk_id) { + auto chunk = _left_input_table()->get_chunk(chunk_id); + + const auto column_count = chunk->column_count(); + for (auto column_id = ColumnID{0}; column_id < column_count; ++column_id) { + for (auto row = size_t{0}; row < chunk->size(); ++row) { + auto cell_length = + static_cast(boost::lexical_cast((*chunk->get_segment(column_id))[row]).size()); + widths[column_id] = std::max({min, widths[column_id], std::min(max, cell_length)}); + } + } + } + return widths; +} + +} // namespace opossum diff --git a/src/lib/operators/print.hpp b/src/lib/operators/print.hpp new file mode 100644 index 0000000..c6672ff --- /dev/null +++ b/src/lib/operators/print.hpp @@ -0,0 +1,24 @@ +#pragma once + +#include "abstract_operator.hpp" + +namespace opossum { + +/** + * operator to print the table with its data + */ +class Print : public AbstractOperator { + public: + explicit Print(const std::shared_ptr in, std::ostream& out = std::cout); + + static void print(std::shared_ptr& table, std::ostream& out = std::cout); + + protected: + std::vector _column_string_widths(uint16_t min, uint16_t max, + const std::shared_ptr& table) const; + std::shared_ptr _on_execute() override; + + // stream to print the result + std::ostream& _out; +}; +} // namespace opossum diff --git a/src/lib/operators/table_scan.hpp b/src/lib/operators/table_scan.hpp new file mode 100644 index 0000000..bb1ab98 --- /dev/null +++ b/src/lib/operators/table_scan.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include "abstract_operator.hpp" +#include "utils/assert.hpp" + +namespace opossum { + +class TableScan : public AbstractOperator { + public: + TableScan(const std::shared_ptr& in, const ColumnID column_id, const ScanType scan_type, + const AllTypeVariant search_value) { + // TODO(student) implement it in a source file and change this to a declaration. + } + + ColumnID column_id() const { + // TODO(student) implement it in a source file and change this to a declaration. + Fail("Implementation missing."); + } + + ScanType scan_type() const { + // TODO(student) implement it in a source file and change this to a declaration. + Fail("Implementation missing."); + } + + const AllTypeVariant& search_value() const { + // TODO(student) implement it in a source file and change this to a declaration. + Fail("Implementation missing."); + } + + protected: + std::shared_ptr _on_execute() override { + // TODO(student) implement it in a source file and change this to a declaration. + Fail("Implementation missing."); + } +}; + +} // namespace opossum diff --git a/src/lib/operators/table_wrapper.cpp b/src/lib/operators/table_wrapper.cpp new file mode 100644 index 0000000..92ac724 --- /dev/null +++ b/src/lib/operators/table_wrapper.cpp @@ -0,0 +1,10 @@ +#include "table_wrapper.hpp" + +namespace opossum { + +TableWrapper::TableWrapper(const std::shared_ptr& table) : _table(table) {} + +std::shared_ptr TableWrapper::_on_execute() { + return _table; +} +} // namespace opossum diff --git a/src/lib/operators/table_wrapper.hpp b/src/lib/operators/table_wrapper.hpp new file mode 100644 index 0000000..3cdea8a --- /dev/null +++ b/src/lib/operators/table_wrapper.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include "abstract_operator.hpp" +#include "utils/assert.hpp" + +namespace opossum { + +/** + * Operator that wraps a table. + */ +class TableWrapper : public AbstractOperator { + public: + explicit TableWrapper(const std::shared_ptr& table); + + protected: + std::shared_ptr _on_execute() override; + + // Table to retrieve + const std::shared_ptr _table; +}; +} // namespace opossum diff --git a/src/lib/storage/reference_segment.cpp b/src/lib/storage/reference_segment.cpp new file mode 100644 index 0000000..dc0a252 --- /dev/null +++ b/src/lib/storage/reference_segment.cpp @@ -0,0 +1,43 @@ +#include "reference_segment.hpp" + +#include "storage/table.hpp" +#include "utils/assert.hpp" + +namespace opossum { + +ReferenceSegment::ReferenceSegment(const std::shared_ptr& referenced_table, + const ColumnID referenced_column_id, const std::shared_ptr& pos) { + // Implementation goes here +} + +AllTypeVariant ReferenceSegment::operator[](const ChunkOffset chunk_offset) const { + // Implementation goes here + Fail("Implementation is missing."); +} + +ChunkOffset ReferenceSegment::size() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +const std::shared_ptr& ReferenceSegment::pos_list() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +const std::shared_ptr& ReferenceSegment::referenced_table() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +ColumnID ReferenceSegment::referenced_column_id() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +size_t ReferenceSegment::estimate_memory_usage() const { + // Implementation goes here + Fail("Implementation is missing."); +} + +} // namespace opossum diff --git a/src/lib/storage/reference_segment.hpp b/src/lib/storage/reference_segment.hpp new file mode 100644 index 0000000..71c149a --- /dev/null +++ b/src/lib/storage/reference_segment.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "abstract_segment.hpp" + +namespace opossum { + +class Table; + +// ReferenceSegment is a specific segment type that stores all its values as position list of a referenced column. +class ReferenceSegment : public AbstractSegment { + public: + // Creates a reference segment. The parameters specify the positions and the referenced column. + ReferenceSegment(const std::shared_ptr& referenced_table, const ColumnID referenced_column_id, + const std::shared_ptr& pos); + + AllTypeVariant operator[](const ChunkOffset chunk_offset) const override; + + ChunkOffset size() const override; + + const std::shared_ptr& pos_list() const; + + const std::shared_ptr& referenced_table() const; + + ColumnID referenced_column_id() const; + + size_t estimate_memory_usage() const final; +}; + +} // namespace opossum diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 79f4bb3..21b1ffd 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -10,8 +10,12 @@ set( OPOSSUM_TEST_SOURCES ${SHARED_SOURCES} lib/all_type_variant_test.cpp + operators/get_table_test.cpp + operators/print_test.cpp + operators/table_scan_test.cpp storage/chunk_test.cpp storage/dictionary_segment_test.cpp + storage/reference_segment_test.cpp storage/storage_manager_test.cpp storage/table_test.cpp storage/value_segment_test.cpp diff --git a/src/test/operators/get_table_test.cpp b/src/test/operators/get_table_test.cpp new file mode 100644 index 0000000..90ce855 --- /dev/null +++ b/src/test/operators/get_table_test.cpp @@ -0,0 +1,30 @@ +#include "base_test.hpp" + +#include "operators/get_table.hpp" +#include "storage/storage_manager.hpp" + +namespace opossum { +class OperatorsGetTableTest : public BaseTest { + protected: + void SetUp() override { + _test_table = std::make_shared(2); + StorageManager::get().add_table("TableA", _test_table); + } + + std::shared_ptr
_test_table; +}; + +TEST_F(OperatorsGetTableTest, GetOutput) { + auto get_table_oper = std::make_shared("TableA"); + get_table_oper->execute(); + + EXPECT_EQ(get_table_oper->get_output(), _test_table); +} + +TEST_F(OperatorsGetTableTest, ThrowsUnknownTableName) { + auto get_table_oper = std::make_shared("TableB"); + + EXPECT_THROW(get_table_oper->execute(), std::logic_error) << "Should throw unknown table name exception"; +} + +} // namespace opossum diff --git a/src/test/operators/print_test.cpp b/src/test/operators/print_test.cpp new file mode 100644 index 0000000..7a65e8a --- /dev/null +++ b/src/test/operators/print_test.cpp @@ -0,0 +1,110 @@ +#include "base_test.hpp" + +#include "operators/get_table.hpp" +#include "operators/print.hpp" +#include "storage/storage_manager.hpp" +#include "storage/table.hpp" + +namespace opossum { + +class OperatorsPrintTest : public BaseTest { + protected: + void SetUp() override { + _table = std::make_shared
(_chunk_size); + _table->add_column("col_1", "int", false); + _table->add_column("col_2", "string", true); + StorageManager::get().add_table(_table_name, _table); + + _get_table_oper = std::make_shared(_table_name); + _get_table_oper->execute(); + } + + std::ostringstream _output; + std::string _table_name = "printTestTable"; + uint32_t _chunk_size = 10; + std::shared_ptr _get_table_oper; + std::shared_ptr
_table = nullptr; +}; + +// Class used to make protected methods visible without modifying the base class with testing code. +class PrintWrapper : public Print { + std::shared_ptr _table; + + public: + explicit PrintWrapper(const std::shared_ptr in) : Print(in), _table(in->get_output()) {} + + std::vector test_column_string_widths(uint16_t min, uint16_t max) { + return _column_string_widths(min, max, _table); + } +}; + +TEST_F(OperatorsPrintTest, EmptyTable) { + auto print = std::make_shared(_get_table_oper, _output); + print->execute(); + + // check if table is correctly passed + EXPECT_EQ(print->get_output(), _table); + + auto output_str = _output.str(); + + // rather hard-coded tests + EXPECT_TRUE(output_str.find("col_1") != std::string::npos); + EXPECT_TRUE(output_str.find("col_2") != std::string::npos); + EXPECT_TRUE(output_str.find("int") != std::string::npos); + EXPECT_TRUE(output_str.find("string_null") != std::string::npos); + EXPECT_TRUE(output_str.find("Empty chunk.") != std::string::npos); +} + +TEST_F(OperatorsPrintTest, FilledTable) { + auto table = StorageManager::get().get_table(_table_name); + for (auto index = size_t{0}; index < _chunk_size * 2; ++index) { + // Char 97 is an 'a' + table->append( + {static_cast(index % _chunk_size), std::string(1, 97 + static_cast(index / _chunk_size))}); + } + + auto print = std::make_shared(_get_table_oper, _output); + print->execute(); + + // Check if table is correctly passed. + EXPECT_EQ(print->get_output(), table); + + auto output_str = _output.str(); + + EXPECT_TRUE(output_str.find("Chunk 0") != std::string::npos); + // There should not be a third chunk (at least that's the current impl). + EXPECT_TRUE(output_str.find("Chunk 3") == std::string::npos); + + // Remove spaces. + output_str.erase(remove_if(output_str.begin(), output_str.end(), isspace), output_str.end()); + + EXPECT_TRUE(output_str.find("|2|a|") != std::string::npos); + EXPECT_TRUE(output_str.find("|9|b|") != std::string::npos); + EXPECT_TRUE(output_str.find("|10|a|") == std::string::npos); +} + +TEST_F(OperatorsPrintTest, GetColumnWidths) { + auto min = size_t{8}; + auto max = size_t{20}; + + auto table = StorageManager::get().get_table(_table_name); + + auto print_wrapper = std::make_shared(_get_table_oper); + auto print_lengths = print_wrapper->test_column_string_widths(min, max); + + // We have two columns, thus two 'lengths'. + ASSERT_EQ(print_lengths.size(), size_t{2}); + // With empty columns and short column names, we should see the minimal lengths. + EXPECT_EQ(print_lengths.at(0), min); + EXPECT_EQ(print_lengths.at(1), min); + + int ten_digits_ints = 1234567890; + + table->append({ten_digits_ints, "quite a long string with more than $max chars"}); + + print_lengths = print_wrapper->test_column_string_widths(min, max); + EXPECT_EQ(print_lengths.at(0), size_t{10}); + EXPECT_EQ(print_lengths.at(1), max); +} + +} // namespace opossum diff --git a/src/test/operators/table_scan_test.cpp b/src/test/operators/table_scan_test.cpp new file mode 100644 index 0000000..e6b902e --- /dev/null +++ b/src/test/operators/table_scan_test.cpp @@ -0,0 +1,286 @@ +#include "base_test.hpp" + +#include "operators/print.hpp" +#include "operators/table_scan.hpp" +#include "operators/table_wrapper.hpp" +#include "storage/reference_segment.hpp" +#include "utils/load_table.hpp" + +namespace opossum { + +class OperatorsTableScanTest : public BaseTest { + protected: + void SetUp() override { + _table_wrapper = std::make_shared(load_table("src/test/tables/int_float.tbl", 2)); + _table_wrapper->execute(); + + std::shared_ptr
test_even_dict = std::make_shared
(5); + test_even_dict->add_column("a", "int", false); + test_even_dict->add_column("b", "int", true); + for (auto index = int32_t{0}; index <= 24; index += 2) { + test_even_dict->append({index, 100 + index}); + } + test_even_dict->append({25, NULL_VALUE}); + + test_even_dict->compress_chunk(ChunkID{0}); + test_even_dict->compress_chunk(ChunkID{1}); + + _table_wrapper_even_dict = std::make_shared(std::move(test_even_dict)); + _table_wrapper_even_dict->execute(); + } + + std::shared_ptr get_table_op_part_dict() { + auto table = std::make_shared
(5); + table->add_column("a", "int", false); + table->add_column("b", "float", true); + + for (auto index = int32_t{1}; index < 20; ++index) { + table->append({index, 100.1 + index}); + } + + table->compress_chunk(ChunkID{0}); + table->compress_chunk(ChunkID{1}); + + auto table_wrapper = std::make_shared(table); + table_wrapper->execute(); + + return table_wrapper; + } + + std::shared_ptr get_table_op_with_n_dict_entries(const int32_t num_entries) { + // Set up dictionary encoded table with a dictionary consisting of num_entries entries. + auto table = std::make_shared(0); + table->add_column("a", "int", false); + table->add_column("b", "float", true); + + for (auto index = int32_t{0}; index <= num_entries; index++) { + table->append({index, 100.0f + index}); + } + + table->compress_chunk(ChunkID{0}); + + auto table_wrapper = std::make_shared(std::move(table)); + table_wrapper->execute(); + return table_wrapper; + } + + void ASSERT_COLUMN_EQ(std::shared_ptr table, const ColumnID column_id, + std::vector expected) { + for (auto chunk_id = ChunkID{0}; chunk_id < table->chunk_count(); ++chunk_id) { + const auto& chunk = table->get_chunk(chunk_id); + + for (auto chunk_offset = ChunkOffset{0}; chunk_offset < chunk->size(); ++chunk_offset) { + const auto segment = chunk->get_segment(column_id); + + const auto found_value = (*segment)[chunk_offset]; + const auto comparator = [found_value](const AllTypeVariant expected_value) { + // Returns equivalency, not equality. + return !(found_value < expected_value) && !(expected_value < found_value); + }; + + auto search = std::find_if(expected.begin(), expected.end(), comparator); + + ASSERT_TRUE(search != expected.end()); + expected.erase(search); + } + } + + ASSERT_TRUE(expected.empty()); + } + + std::shared_ptr _table_wrapper, _table_wrapper_even_dict; +}; + +TEST_F(OperatorsTableScanTest, DoubleScan) { + auto expected_result = load_table("src/test/tables/int_float_filtered.tbl", 2); + + auto scan_1 = std::make_shared(_table_wrapper, ColumnID{0}, ScanType::OpGreaterThanEquals, 1234); + scan_1->execute(); + + auto scan_2 = std::make_shared(scan_1, ColumnID{1}, ScanType::OpLessThan, 457.9); + scan_2->execute(); + + EXPECT_TABLE_EQ(scan_2->get_output(), expected_result); +} + +TEST_F(OperatorsTableScanTest, EmptyResultScan) { + auto scan_1 = std::make_shared(_table_wrapper, ColumnID{0}, ScanType::OpGreaterThan, 90000); + scan_1->execute(); + + for (auto chunk_index = ChunkID{0}; chunk_index < scan_1->get_output()->chunk_count(); chunk_index++) + EXPECT_EQ(scan_1->get_output()->get_chunk(chunk_index)->column_count(), 2); +} + +TEST_F(OperatorsTableScanTest, SingleScanReturnsCorrectRowCount) { + auto expected_result = load_table("src/test/tables/int_float_filtered2.tbl", 1); + + auto scan = std::make_shared(_table_wrapper, ColumnID{0}, ScanType::OpGreaterThanEquals, 1234); + scan->execute(); + + EXPECT_TABLE_EQ(scan->get_output(), expected_result); +} + +TEST_F(OperatorsTableScanTest, ScanOnDictColumn) { + // We do not need to check for a non existing value, because that happens automatically when we scan the second chunk. + + auto tests = std::map>{}; + tests[ScanType::OpEquals] = {104}; + tests[ScanType::OpNotEquals] = {100, 102, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, NULL_VALUE}; + tests[ScanType::OpLessThan] = {100, 102}; + tests[ScanType::OpLessThanEquals] = {100, 102, 104}; + tests[ScanType::OpGreaterThan] = {106, 108, 110, 112, 114, 116, 118, 120, 122, 124, NULL_VALUE}; + tests[ScanType::OpGreaterThanEquals] = {104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, NULL_VALUE}; + + for (const auto& test : tests) { + auto scan = std::make_shared(_table_wrapper_even_dict, ColumnID{0}, test.first, 4); + scan->execute(); + + ASSERT_COLUMN_EQ(scan->get_output(), ColumnID{1}, test.second); + } +} + +TEST_F(OperatorsTableScanTest, ScanOnReferencedDictColumn) { + // We do not need to check for a non existing value, because that happens automatically when we scan the second + // chunk. + + auto tests = std::map>{}; + tests[ScanType::OpEquals] = {104}; + tests[ScanType::OpNotEquals] = {100, 102, 106}; + tests[ScanType::OpLessThan] = {100, 102}; + tests[ScanType::OpLessThanEquals] = {100, 102, 104}; + tests[ScanType::OpGreaterThan] = {106}; + tests[ScanType::OpGreaterThanEquals] = {104, 106}; + + for (const auto& test : tests) { + auto scan1 = std::make_shared(_table_wrapper_even_dict, ColumnID{1}, ScanType::OpLessThan, 108); + scan1->execute(); + + auto scan2 = std::make_shared(scan1, ColumnID{0}, test.first, 4); + scan2->execute(); + + ASSERT_COLUMN_EQ(scan2->get_output(), ColumnID{1}, test.second); + } +} + +TEST_F(OperatorsTableScanTest, ScanPartiallyCompressed) { + auto expected_result = load_table("src/test/tables/int_float_seq_filtered.tbl", 2); + + auto table_wrapper = get_table_op_part_dict(); + auto scan_1 = std::make_shared(table_wrapper, ColumnID{0}, ScanType::OpLessThan, 10); + scan_1->execute(); + + EXPECT_TABLE_EQ(scan_1->get_output(), expected_result); +} + +TEST_F(OperatorsTableScanTest, ScanOnDictColumnValueGreaterThanMaxDictionaryValue) { + const auto all_rows = + std::vector{100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, NULL_VALUE}; + const auto no_rows = std::vector{}; + + auto tests = std::map>{}; + tests[ScanType::OpEquals] = no_rows; + tests[ScanType::OpNotEquals] = all_rows; + tests[ScanType::OpLessThan] = all_rows; + tests[ScanType::OpLessThanEquals] = all_rows; + tests[ScanType::OpGreaterThan] = no_rows; + tests[ScanType::OpGreaterThanEquals] = no_rows; + + for (const auto& test : tests) { + auto scan = std::make_shared(_table_wrapper_even_dict, ColumnID{0}, test.first, 30); + scan->execute(); + + ASSERT_COLUMN_EQ(scan->get_output(), ColumnID{1}, test.second); + } +} + +TEST_F(OperatorsTableScanTest, ScanOnDictColumnValueLessThanMinDictionaryValue) { + const auto all_rows = + std::vector{100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, NULL_VALUE}; + const auto no_rows = std::vector{}; + + auto tests = std::map>{}; + tests[ScanType::OpEquals] = no_rows; + tests[ScanType::OpNotEquals] = all_rows; + tests[ScanType::OpLessThan] = no_rows; + tests[ScanType::OpLessThanEquals] = no_rows; + tests[ScanType::OpGreaterThan] = all_rows; + tests[ScanType::OpGreaterThanEquals] = all_rows; + + for (const auto& test : tests) { + auto scan = std::make_shared(_table_wrapper_even_dict, ColumnID{0} /* "a" */, test.first, -10); + scan->execute(); + + ASSERT_COLUMN_EQ(scan->get_output(), ColumnID{1}, test.second); + } +} + +TEST_F(OperatorsTableScanTest, ScanOnDictColumnAroundBounds) { + // Scanning for a value that is around the dictionary's bounds. + + auto tests = std::map>{}; + tests[ScanType::OpEquals] = {100}; + tests[ScanType::OpLessThan] = {}; + tests[ScanType::OpLessThanEquals] = {100}; + tests[ScanType::OpGreaterThan] = {102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, NULL_VALUE}; + tests[ScanType::OpGreaterThanEquals] = {100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, NULL_VALUE}; + tests[ScanType::OpNotEquals] = {102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, NULL_VALUE}; + + for (const auto& test : tests) { + auto scan = std::make_shared(_table_wrapper_even_dict, ColumnID{0}, test.first, 0); + scan->execute(); + + ASSERT_COLUMN_EQ(scan->get_output(), ColumnID{1}, test.second); + } +} + +TEST_F(OperatorsTableScanTest, ScanWithEmptyInput) { + auto scan_1 = std::make_shared(_table_wrapper, ColumnID{0}, ScanType::OpGreaterThan, 12345); + scan_1->execute(); + EXPECT_EQ(scan_1->get_output()->row_count(), static_cast(0)); + + // Scan_1 produced an empty result. + auto scan_2 = std::make_shared(scan_1, ColumnID{1}, ScanType::OpEquals, 456.7); + scan_2->execute(); + + EXPECT_EQ(scan_2->get_output()->row_count(), static_cast(0)); +} + +TEST_F(OperatorsTableScanTest, ScanOnWideDictionarySegment) { + // 2**8 + 1 values require a data type of 16bit. + const auto table_wrapper_dict_16 = get_table_op_with_n_dict_entries((1 << 8) + 1); + auto scan_1 = std::make_shared(table_wrapper_dict_16, ColumnID{0}, ScanType::OpGreaterThan, 200); + scan_1->execute(); + + EXPECT_EQ(scan_1->get_output()->row_count(), static_cast(57)); + + // 2**16 + 1 values require a data type of 32bit. + const auto table_wrapper_dict_32 = get_table_op_with_n_dict_entries((1 << 16) + 1); + auto scan_2 = + std::make_shared(table_wrapper_dict_32, ColumnID{0}, ScanType::OpGreaterThan, 65500); + scan_2->execute(); + + EXPECT_EQ(scan_2->get_output()->row_count(), static_cast(37)); +} + +TEST_F(OperatorsTableScanTest, ScanOnReferenceSegmentWithNullValue) { + auto tests = std::map>{}; + tests[ScanType::OpEquals] = {104}; + tests[ScanType::OpNotEquals] = {100, 102, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124}; + tests[ScanType::OpLessThan] = {100, 102}; + tests[ScanType::OpLessThanEquals] = {100, 102, 104}; + tests[ScanType::OpGreaterThan] = {106, 108, 110, 112, 114, 116, 118, 120, 122, 124}; + tests[ScanType::OpGreaterThanEquals] = {104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124}; + + for (const auto& test : tests) { + auto scan_1 = + std::make_shared(_table_wrapper_even_dict, ColumnID{0} /* "a" */, ScanType::OpGreaterThan, -10); + scan_1->execute(); + + auto scan_2 = std::make_shared(scan_1, ColumnID{1}, test.first, 104); + scan_2->execute(); + + ASSERT_COLUMN_EQ(scan_2->get_output(), ColumnID{1}, test.second); + } +} + +} // namespace opossum diff --git a/src/test/storage/reference_segment_test.cpp b/src/test/storage/reference_segment_test.cpp new file mode 100644 index 0000000..e5240ca --- /dev/null +++ b/src/test/storage/reference_segment_test.cpp @@ -0,0 +1,95 @@ +#include "base_test.hpp" + +#include "operators/abstract_operator.hpp" +#include "operators/get_table.hpp" +#include "operators/print.hpp" +#include "operators/table_scan.hpp" +#include "storage/reference_segment.hpp" +#include "storage/storage_manager.hpp" + +namespace opossum { + +class ReferenceSegmentTest : public BaseTest { + virtual void SetUp() { + _test_table = std::make_shared
(3); + _test_table->add_column("a", "int", false); + _test_table->add_column("b", "float", true); + _test_table->append({123, 456.7f}); + _test_table->append({1234, 457.7f}); + _test_table->append({12345, 458.7f}); + _test_table->append({54321, 458.7f}); + _test_table->append({12345, 458.7f}); + + _test_table_dict = std::make_shared
(5); + _test_table_dict->add_column("a", "int", false); + _test_table_dict->add_column("b", "int", true); + for (auto value = int32_t{0}; value <= 24; value += 2) { + _test_table_dict->append({value, 100 + value}); + } + + _test_table_dict->compress_chunk(ChunkID{0}); + _test_table_dict->compress_chunk(ChunkID{1}); + + StorageManager::get().add_table("test_table_dict", _test_table_dict); + } + + public: + std::shared_ptr
_test_table, _test_table_dict; +}; + +TEST_F(ReferenceSegmentTest, RetrievesValues) { + // PosList with (0, 0), (0, 1), (0, 2) + auto pos_list = std::make_shared( + std::initializer_list({RowID{ChunkID{0}, 0}, RowID{ChunkID{0}, 1}, RowID{ChunkID{0}, 2}})); + auto reference_segment = ReferenceSegment(_test_table, ColumnID{0}, pos_list); + + auto& segment = *(_test_table->get_chunk(ChunkID{0})->get_segment(ColumnID{0})); + + EXPECT_EQ(reference_segment[0], segment[0]); + EXPECT_EQ(reference_segment[1], segment[1]); + EXPECT_EQ(reference_segment[2], segment[2]); +} + +TEST_F(ReferenceSegmentTest, RetrievesValuesOutOfOrder) { + // PosList with (0, 1), (0, 2), (0, 0) + auto pos_list = std::make_shared( + std::initializer_list({RowID{ChunkID{0}, 1}, RowID{ChunkID{0}, 2}, RowID{ChunkID{0}, 0}})); + auto reference_segment = ReferenceSegment(_test_table, ColumnID{0}, pos_list); + + auto& segment = *(_test_table->get_chunk(ChunkID{0})->get_segment(ColumnID{0})); + + EXPECT_EQ(reference_segment[0], segment[1]); + EXPECT_EQ(reference_segment[1], segment[2]); + EXPECT_EQ(reference_segment[2], segment[0]); +} + +TEST_F(ReferenceSegmentTest, RetrievesValuesFromChunks) { + // PosList with (0, 2), (1, 0), (1, 1) + auto pos_list = std::make_shared( + std::initializer_list({RowID{ChunkID{0}, 2}, RowID{ChunkID{1}, 0}, RowID{ChunkID{1}, 1}})); + auto reference_segment = ReferenceSegment(_test_table, ColumnID{0}, pos_list); + + auto& segment_1 = *(_test_table->get_chunk(ChunkID{0})->get_segment(ColumnID{0})); + auto& segment_2 = *(_test_table->get_chunk(ChunkID{1})->get_segment(ColumnID{0})); + + EXPECT_EQ(reference_segment[0], segment_1[2]); + EXPECT_EQ(reference_segment[2], segment_2[1]); +} + +TEST_F(ReferenceSegmentTest, RetrieveNullValueFromNullRowID) { + // RowIDPosList with (0, 0), (0, 1), NULL_ROW_ID, (0, 2) + auto pos_list = std::make_shared( + std::initializer_list({RowID{ChunkID{0}, ChunkOffset{0}}, RowID{ChunkID{0}, ChunkOffset{1}}, NULL_ROW_ID, + RowID{ChunkID{0}, ChunkOffset{2}}})); + + auto ref_segment = ReferenceSegment(_test_table, ColumnID{0}, pos_list); + + auto& segment = *(_test_table->get_chunk(ChunkID{0})->get_segment(ColumnID{0})); + + EXPECT_EQ(ref_segment[ChunkOffset{0}], segment[ChunkOffset{0}]); + EXPECT_EQ(ref_segment[ChunkOffset{1}], segment[ChunkOffset{1}]); + EXPECT_TRUE(variant_is_null(ref_segment[ChunkOffset{2}])); + EXPECT_EQ(ref_segment[ChunkOffset{3}], segment[ChunkOffset{2}]); +} + +} // namespace opossum