Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f01566f
feat: introduce sst file format for btree global index
ChaomingZhangCN Jan 5, 2026
3a8f42d
feat: introduce sst file format for btree global index
ChaomingZhangCN Jan 6, 2026
96ec401
fix bloom filter
ChaomingZhangCN Jan 7, 2026
69e1434
minor fix
ChaomingZhangCN Jan 7, 2026
95e2012
fix compile
ChaomingZhangCN Jan 7, 2026
e0c876e
minor fix
ChaomingZhangCN Jan 7, 2026
73bc0c2
fix compile
ChaomingZhangCN Jan 7, 2026
0fdc643
Merge branch 'main' into sst-file-format
ChaomingZhangCN Jan 7, 2026
3ffcf50
add virtual
ChaomingZhangCN Jan 7, 2026
e08c162
address
ChaomingZhangCN Jan 8, 2026
592df79
Merge branch 'main' into sst-file-format
ChaomingZhangCN Jan 8, 2026
573f843
address
Jan 8, 2026
a5c8c53
address
Jan 8, 2026
a56c96c
fix tests
Jan 8, 2026
757d212
fix tests
Jan 8, 2026
a0cb2a7
fix tests
Jan 8, 2026
bfa93f4
fix tests
Jan 8, 2026
dfe0527
fix tests
Jan 8, 2026
a389fb0
Merge branch 'main' into sst-file-format
lucasfang Jan 12, 2026
4e43fe4
address
ChaomingZhangCN Jan 12, 2026
384e1da
Merge branch 'sst-file-format' of https://github.com/ChaomingZhangCN/…
ChaomingZhangCN Jan 12, 2026
672d0ff
clang lint
ChaomingZhangCN Jan 12, 2026
779440c
clang lint
ChaomingZhangCN Jan 12, 2026
0916877
Merge branch 'main' into sst-file-format
ChaomingZhangCN Jan 12, 2026
9ab4a91
update copyright
ChaomingZhangCN Jan 14, 2026
447b04f
check byte order
ChaomingZhangCN Jan 16, 2026
3ab2c04
address
ChaomingZhangCN Jan 18, 2026
7eadb8a
Merge branch 'main' into sst-file-format
lxy-9602 Jan 20, 2026
734f886
add tests
ChaomingZhangCN Jan 21, 2026
46a06a4
minor fix
ChaomingZhangCN Jan 21, 2026
065be74
minor fix
ChaomingZhangCN Jan 21, 2026
29ec989
Merge branch 'main' into sst-file-format
ChaomingZhangCN Jan 21, 2026
53e04ea
fix clang test failed
ChaomingZhangCN Jan 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions src/paimon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,17 @@ set(PAIMON_COMMON_SRCS
common/io/data_output_stream.cpp
common/io/memory_segment_output_stream.cpp
common/io/offset_input_stream.cpp
common/io/cache/cache.cpp
common/io/cache/cache_key.cpp
common/io/cache/cache_manager.cpp
common/logging/logging.cpp
common/memory/bytes.cpp
common/memory/memory_pool.cpp
common/memory/memory_segment.cpp
common/memory/memory_segment_utils.cpp
common/memory/memory_slice.cpp
common/memory/memory_slice_input.cpp
common/memory/memory_slice_output.cpp
common/metrics/metrics_impl.cpp
common/options/memory_size.cpp
common/options/time_duration.cpp
Expand Down Expand Up @@ -90,13 +96,23 @@ set(PAIMON_COMMON_SRCS
common/reader/reader_utils.cpp
common/reader/complete_row_kind_batch_reader.cpp
common/reader/data_evolution_file_reader.cpp
common/sst/block_handle.cpp
common/sst/block_footer.cpp
common/sst/block_iterator.cpp
common/sst/block_trailer.cpp
common/sst/block_reader.cpp
common/sst/block_writer.cpp
common/sst/sst_file_reader.cpp
common/sst/sst_file_writer.cpp
common/types/data_field.cpp
common/types/data_type.cpp
common/types/data_type_json_parser.cpp
common/types/row_kind.cpp
common/types/row_type.cpp
common/utils/arrow/mem_utils.cpp
common/utils/binary_row_partition_computer.cpp
common/utils/bit_set.cpp
common/utils/bloom_filter.cpp
common/utils/bloom_filter64.cpp
common/utils/bucket_id_calculator.cpp
common/utils/decimal_utils.cpp
Expand Down Expand Up @@ -367,6 +383,8 @@ if(PAIMON_BUILD_TESTS)
common/utils/concurrent_hash_map_test.cpp
common/utils/projected_row_test.cpp
common/utils/projected_array_test.cpp
common/utils/bit_set_test.cpp
common/utils/bloom_filter_test.cpp
common/utils/bloom_filter64_test.cpp
common/utils/xxhash_test.cpp
common/utils/bucket_id_calculator_test.cpp
Expand Down Expand Up @@ -414,6 +432,17 @@ if(PAIMON_BUILD_TESTS)
test_utils_static
${GTEST_LINK_TOOLCHAIN})

add_paimon_test(common_sst_file_format_test
SOURCES
common/sst/sst_file_io_test.cpp
STATIC_LINK_LIBS
paimon_shared
test_utils_static
"-Wl,--whole-archive"
paimon_local_file_system_static
"-Wl,--no-whole-archive"
${GTEST_LINK_TOOLCHAIN})

add_paimon_test(core_test
SOURCES
core/append/append_only_writer_test.cpp
Expand Down
42 changes: 42 additions & 0 deletions src/paimon/common/io/cache/cache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright 2026-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "paimon/common/io/cache/cache.h"

namespace paimon {
std::shared_ptr<CacheValue> NoCache::Get(
const std::shared_ptr<CacheKey>& key,
std::function<std::shared_ptr<CacheValue>(const std::shared_ptr<CacheKey>&)> supplier) {
return supplier(key);
}

void NoCache::Put(const std::shared_ptr<CacheKey>& key, const std::shared_ptr<CacheValue>& value) {
// do nothing
}

void NoCache::Invalidate(const std::shared_ptr<CacheKey>& key) {
// do nothing
}

void NoCache::InvalidateAll() {
// do nothing
}

std::unordered_map<std::shared_ptr<CacheKey>, std::shared_ptr<CacheValue>> NoCache::AsMap() {
return {};
}

} // namespace paimon
71 changes: 71 additions & 0 deletions src/paimon/common/io/cache/cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright 2026-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once
#include <cstdint>
#include <functional>
#include <memory>
#include <string>

#include "paimon/common/io/cache/cache_key.h"
#include "paimon/common/memory/memory_segment.h"
#include "paimon/status.h"

namespace paimon {
class CacheValue;

class Cache {
public:
virtual ~Cache() = default;
virtual std::shared_ptr<CacheValue> Get(
const std::shared_ptr<CacheKey>& key,
std::function<std::shared_ptr<CacheValue>(const std::shared_ptr<CacheKey>&)> supplier) = 0;

virtual void Put(const std::shared_ptr<CacheKey>& key,
const std::shared_ptr<CacheValue>& value) = 0;

virtual void Invalidate(const std::shared_ptr<CacheKey>& key) = 0;

virtual void InvalidateAll() = 0;

virtual std::unordered_map<std::shared_ptr<CacheKey>, std::shared_ptr<CacheValue>> AsMap() = 0;
};

class NoCache : public Cache {
public:
std::shared_ptr<CacheValue> Get(
const std::shared_ptr<CacheKey>& key,
std::function<std::shared_ptr<CacheValue>(const std::shared_ptr<CacheKey>&)> supplier)
override;
void Put(const std::shared_ptr<CacheKey>& key,
const std::shared_ptr<CacheValue>& value) override;
void Invalidate(const std::shared_ptr<CacheKey>& key) override;
void InvalidateAll() override;
std::unordered_map<std::shared_ptr<CacheKey>, std::shared_ptr<CacheValue>> AsMap() override;
};

class CacheValue {
public:
explicit CacheValue(const std::shared_ptr<MemorySegment>& segment) : segment_(segment) {}

std::shared_ptr<MemorySegment> GetSegment() {
return segment_;
}

private:
std::shared_ptr<MemorySegment> segment_;
};
} // namespace paimon
53 changes: 53 additions & 0 deletions src/paimon/common/io/cache/cache_key.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright 2026-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "paimon/common/io/cache/cache_key.h"

namespace paimon {

std::shared_ptr<CacheKey> CacheKey::ForPosition(const std::string& file_path, int64_t position,
int32_t length, bool is_index) {
return std::make_shared<PositionCacheKey>(file_path, position, length, is_index);
}

bool PositionCacheKey::IsIndex() {
return is_index_;
}

int64_t PositionCacheKey::Position() const {
return position_;
}

int32_t PositionCacheKey::Length() const {
return length_;
}

bool PositionCacheKey::operator==(const PositionCacheKey& other) const {
return file_path_ == other.file_path_ && position_ == other.position_ &&

length_ == other.length_ && is_index_ == other.is_index_;
}

size_t PositionCacheKey::HashCode() const {
size_t seed = 0;
seed ^= std::hash<std::string>{}(file_path_) + HASH_CONSTANT + (seed << 6) + (seed >> 2);
seed ^= std::hash<int64_t>{}(position_) + HASH_CONSTANT + (seed << 6) + (seed >> 2);
seed ^= std::hash<int32_t>{}(length_) + HASH_CONSTANT + (seed << 6) + (seed >> 2);
seed ^= std::hash<bool>{}(is_index_) + HASH_CONSTANT + (seed << 6) + (seed >> 2);
return seed;
}

} // namespace paimon
68 changes: 68 additions & 0 deletions src/paimon/common/io/cache/cache_key.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright 2026-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once
#include <cstdint>
#include <functional>
#include <memory>
#include <string>

#include "paimon/status.h"

namespace paimon {

class CacheKey {
public:
static std::shared_ptr<CacheKey> ForPosition(const std::string& file_path, int64_t position,
int32_t length, bool is_index);

public:
virtual ~CacheKey() = default;

virtual bool IsIndex() = 0;
};

class PositionCacheKey : public CacheKey {
public:
PositionCacheKey(const std::string& file_path, int64_t position, int32_t length, bool is_index)
: file_path_(file_path), position_(position), length_(length), is_index_(is_index) {}

bool IsIndex() override;

int64_t Position() const;
int32_t Length() const;

bool operator==(const PositionCacheKey& other) const;
size_t HashCode() const;

private:
static constexpr uint64_t HASH_CONSTANT = 0x9e3779b97f4a7c15ULL;

const std::string file_path_;
const int64_t position_;
const int32_t length_;
const bool is_index_;
};
} // namespace paimon

namespace std {
template <>
struct hash<paimon::PositionCacheKey> {
size_t operator()(const paimon::PositionCacheKey& key) const {
return key.HashCode();
}
};
} // namespace std
45 changes: 45 additions & 0 deletions src/paimon/common/io/cache/cache_manager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright 2026-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "paimon/common/io/cache/cache_manager.h"

namespace paimon {

std::shared_ptr<MemorySegment> CacheManager::GetPage(
std::shared_ptr<CacheKey>& key,
std::function<Result<MemorySegment>(const std::shared_ptr<CacheKey>&)> reader) {
auto& cache = key->IsIndex() ? index_cache_ : data_cache_;
auto supplier = [=](const std::shared_ptr<CacheKey>& k) -> std::shared_ptr<CacheValue> {
auto ret = reader(k);
if (!ret.ok()) {
return nullptr;
}
auto segment = ret.value();
auto ptr = std::make_shared<MemorySegment>(segment);
return std::make_shared<CacheValue>(ptr);
};
return cache->Get(key, supplier)->GetSegment();
}

void CacheManager::InvalidPage(std::shared_ptr<CacheKey>& key) {
if (key->IsIndex()) {
index_cache_->Invalidate(key);
} else {
data_cache_->Invalidate(key);
}
}

} // namespace paimon
Loading
Loading