Skip to content

Commit 36b8d1b

Browse files
authored
Merge pull request #32950 from vespa-engine/toregge/log-posting-list-reads
Log posting list reads.
2 parents 6b3597f + 9545567 commit 36b8d1b

11 files changed

+128
-26
lines changed

searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
LOG_SETUP(".diskindex.bitvectordictionary");
1313

1414
using search::index::BitVectorDictionaryLookupResult;
15+
using search::index::PostingListFileRange;
1516

1617
namespace search::diskindex {
1718

@@ -117,4 +118,14 @@ BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_resul
117118
return read_bitvector(lookup_result, read_stats);
118119
}
119120

121+
PostingListFileRange
122+
BitVectorDictionary::get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const
123+
{
124+
if (!lookup_result.valid()) {
125+
return {0, 0};
126+
}
127+
uint64_t offset = ((uint64_t) _vectorSize) * lookup_result.idx + _datHeaderLen;
128+
return {offset, offset + _vectorSize};
129+
}
130+
120131
}

searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include "bitvectorkeyscope.h"
55
#include <vespa/searchlib/index/bitvector_dictionary_lookup_result.h>
6+
#include <vespa/searchlib/index/posting_list_file_range.h>
67
#include <vespa/searchlib/index/bitvectorkeys.h>
78
#include <vespa/searchlib/common/tunefileinfo.h>
89
#include <string>
@@ -70,6 +71,7 @@ class BitVectorDictionary
7071
std::unique_ptr<BitVector> read_bitvector(index::BitVectorDictionaryLookupResult lookup_result,
7172
ReadStats &read_stats);
7273
std::unique_ptr<BitVector> read_bitvector(index::BitVectorDictionaryLookupResult lookup_result);
74+
index::PostingListFileRange get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const;
7375

7476
uint32_t getDocIdLimit() const noexcept { return _docIdLimit; }
7577

searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,46 @@ DiskTermBlueprint::DiskTermBlueprint(const FieldSpec & field,
6060
_lookupRes.counts._numDocs == 0));
6161
}
6262

63+
void
64+
DiskTermBlueprint::log_bitvector_read() const
65+
{
66+
auto range = _field_index.get_bitvector_file_range(_bitvector_lookup_result);
67+
LOG(debug, "DiskTermBlueprint::fetchPosting "
68+
"bitvector %s %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu32 " %" PRIu64 " %" PRIu64,
69+
_field.getName().c_str(), _query_term.c_str(), _field_index.get_file_id(),
70+
_lookupRes.wordNum, _lookupRes.counts._numDocs,
71+
_bitvector_lookup_result.idx,
72+
range.start_offset, range.size());
73+
74+
}
75+
76+
void
77+
DiskTermBlueprint::log_posting_list_read() const
78+
{
79+
auto range = _field_index.get_posting_list_file_range(_lookupRes);
80+
LOG(debug, "DiskTermBlueprint::fetchPosting "
81+
"posting %s %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64,
82+
_field.getName().c_str(), _query_term.c_str(), _field_index.get_file_id(),
83+
_lookupRes.wordNum, _lookupRes.counts._numDocs,
84+
_lookupRes.bitOffset, _lookupRes.counts._bitLength,
85+
range.start_offset, range.size());
86+
}
87+
6388
void
6489
DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo)
6590
{
6691
(void) execInfo;
6792
if (!_fetchPostingsDone) {
6893
if (_useBitVector && _bitvector_lookup_result.valid()) {
94+
if (LOG_WOULD_LOG(debug)) [[unlikely]] {
95+
log_bitvector_read();
96+
}
6997
_bitVector = _field_index.read_bit_vector(_bitvector_lookup_result);
7098
}
7199
if (!_bitVector) {
100+
if (LOG_WOULD_LOG(debug)) [[unlikely]] {
101+
log_posting_list_read();
102+
}
72103
_postingHandle = _field_index.read_posting_list(_lookupRes);
73104
}
74105
}
@@ -90,6 +121,9 @@ DiskTermBlueprint::get_bitvector() const
90121
}
91122
std::lock_guard guard(_mutex);
92123
if (!_late_bitvector) {
124+
if (LOG_WOULD_LOG(debug)) [[unlikely]] {
125+
log_bitvector_read();
126+
}
93127
_late_bitvector = _field_index.read_bit_vector(_bitvector_lookup_result);
94128
assert(_late_bitvector);
95129
}

searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint
2626
mutable std::shared_ptr<BitVector> _late_bitvector;
2727

2828
const BitVector* get_bitvector() const;
29+
void log_bitvector_read() const __attribute__((noinline));
30+
void log_posting_list_read() const __attribute__((noinline));
2931
public:
3032
/**
3133
* Create a new blueprint.

searchlib/src/vespa/searchlib/diskindex/field_index.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
2424
using DiskPostingFile = index::PostingListFileRandRead;
2525
using DiskPostingFileReal = Zc4PosOccRandRead;
2626
using DiskPostingFileDynamicKReal = ZcPosOccRandRead;
27+
using PostingListFileRange = index::PostingListFileRange;
2728

2829
class LockedFieldIndexIoStats {
2930
FieldIndexIoStats _stats;
@@ -76,17 +77,24 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
7677
bool trim) const;
7778
index::PostingListHandle read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const override;
7879
index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const;
80+
PostingListFileRange get_posting_list_file_range(const search::index::DictionaryLookupResult& lookup_result) const {
81+
return _posting_file->get_posting_list_file_range(lookup_result);
82+
}
7983
index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const;
8084
std::shared_ptr<BitVector> read_uncached_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const;
8185
std::shared_ptr<BitVector> read(const IPostingListCache::BitVectorKey& key, IPostingListCache::Context& ctx) const override;
8286
std::shared_ptr<BitVector> read_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const;
87+
PostingListFileRange get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const {
88+
return _bit_vector_dict->get_bitvector_file_range(lookup_result);
89+
}
8390
std::unique_ptr<search::queryeval::SearchIterator> create_iterator(const search::index::DictionaryLookupResult& lookup_result,
8491
const index::PostingListHandle& handle,
8592
const search::fef::TermFieldMatchDataArray& tfmda) const;
8693
index::FieldLengthInfo get_field_length_info() const;
8794

8895
index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); }
8996
FieldIndexStats get_stats(bool clear_disk_io_stats) const;
97+
uint64_t get_file_id() const noexcept { return _file_id; }
9098
uint32_t get_field_id() const noexcept { return _field_id; }
9199
bool is_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; }
92100
};

searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ using search::bitcompression::EGPosOccDecodeContext;
2020
using search::bitcompression::EGPosOccDecodeContextCooked;
2121
using search::bitcompression::PosOccFieldsParams;
2222
using search::bitcompression::FeatureDecodeContext;
23+
using search::index::DictionaryLookupResult;
2324
using search::index::FieldLengthInfo;
2425
using search::index::PostingListCounts;
26+
using search::index::PostingListFileRange;
2527
using search::index::PostingListHandle;
2628
using search::ComprFileReadContext;
2729

@@ -31,6 +33,17 @@ std::string myId4("Zc.4");
3133
std::string myId5("Zc.5");
3234
std::string interleaved_features("interleaved_features");
3335

36+
PostingListFileRange get_file_range(const DictionaryLookupResult& lookup_result, uint64_t header_bit_size)
37+
{
38+
uint64_t start_offset = (lookup_result.bitOffset + header_bit_size) >> 3;
39+
// Align start at 64-bit boundary
40+
start_offset -= (start_offset & 7);
41+
uint64_t end_offset = (lookup_result.bitOffset + header_bit_size + lookup_result.counts._bitLength + 7) >> 3;
42+
// Align end at 64-bit boundary
43+
end_offset += (-end_offset & 7);
44+
return {start_offset, end_offset};
45+
}
46+
3447
}
3548

3649
namespace search::diskindex {
@@ -91,25 +104,18 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
91104
return handle;
92105
}
93106

94-
uint64_t startOffset = (lookup_result.bitOffset + _headerBitSize) >> 3;
95-
// Align start at 64-bit boundary
96-
startOffset -= (startOffset & 7);
97-
uint64_t endOffset = (lookup_result.bitOffset + _headerBitSize +
98-
lookup_result.counts._bitLength + 7) >> 3;
99-
// Align end at 64-bit boundary
100-
endOffset += (-endOffset & 7);
101-
102-
void *mapPtr = _file->MemoryMapPtr(startOffset);
107+
auto file_range = get_file_range(lookup_result, _headerBitSize);
108+
void *mapPtr = _file->MemoryMapPtr(file_range.start_offset);
103109
if (mapPtr != nullptr) {
104110
handle._mem = mapPtr;
105-
size_t pad_before = startOffset - vespalib::round_down_to_page_boundary(startOffset);
106-
handle._read_bytes = vespalib::round_up_to_page_size(pad_before + endOffset - startOffset + decode_prefetch_size);
111+
size_t pad_before = file_range.start_offset - vespalib::round_down_to_page_boundary(file_range.start_offset);
112+
handle._read_bytes = vespalib::round_up_to_page_size(pad_before + file_range.size() + decode_prefetch_size);
107113
} else {
108-
uint64_t vectorLen = endOffset - startOffset;
114+
uint64_t vectorLen = file_range.size();
109115
size_t padBefore;
110116
size_t padAfter;
111117
size_t padExtraAfter; // Decode prefetch space
112-
_file->DirectIOPadding(startOffset, vectorLen, padBefore, padAfter);
118+
_file->DirectIOPadding(file_range.start_offset, vectorLen, padBefore, padAfter);
113119
padExtraAfter = 0;
114120
if (padAfter < decode_prefetch_size) {
115121
padExtraAfter = decode_prefetch_size - padAfter;
@@ -120,10 +126,10 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
120126
if (mallocLen > 0) {
121127
alignedBuffer = _file->AllocateDirectIOBuffer(mallocLen);
122128
assert(alignedBuffer != nullptr);
123-
assert(endOffset + padAfter + padExtraAfter <= _fileSize);
129+
assert(file_range.end_offset + padAfter + padExtraAfter <= _fileSize);
124130
_file->ReadBuf(alignedBuffer,
125131
padBefore + vectorLen + padAfter,
126-
startOffset - padBefore);
132+
file_range.start_offset - padBefore);
127133
}
128134
// Zero decode prefetch memory to avoid uninitialized reads
129135
if (padExtraAfter > 0) {
@@ -136,7 +142,7 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
136142
handle._allocSize = mallocLen;
137143
handle._read_bytes = padBefore + vectorLen + padAfter;
138144
}
139-
handle._bitOffsetMem = (startOffset << 3) - _headerBitSize;
145+
handle._bitOffsetMem = (file_range.start_offset << 3) - _headerBitSize;
140146
return handle;
141147
}
142148

@@ -147,14 +153,8 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &looku
147153
if (lookup_result.counts._bitLength == 0 || _memoryMapped) {
148154
return;
149155
}
150-
uint64_t start_offset = (lookup_result.bitOffset + _headerBitSize) >> 3;
151-
// Align start at 64-bit boundary
152-
start_offset -= (start_offset & 7);
153-
uint64_t end_offset = (lookup_result.bitOffset + _headerBitSize +
154-
lookup_result.counts._bitLength + 7) >> 3;
155-
// Align end at 64-bit boundary
156-
end_offset += (-end_offset & 7);
157-
size_t malloc_len = end_offset - start_offset + decode_prefetch_size;
156+
auto file_range = get_file_range(lookup_result, _headerBitSize);
157+
size_t malloc_len = file_range.size() + decode_prefetch_size;
158158
if (handle._allocSize == malloc_len) {
159159
assert(handle._allocMem.get() == handle._mem);
160160
return;
@@ -169,7 +169,16 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &looku
169169
handle._allocMem = std::shared_ptr<void>(mem, free);
170170
handle._mem = mem;
171171
handle._allocSize = malloc_len;
172-
handle._read_bytes = end_offset - start_offset;
172+
handle._read_bytes = file_range.size();
173+
}
174+
175+
PostingListFileRange
176+
ZcPosOccRandRead::get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const
177+
{
178+
if (lookup_result.counts._bitLength == 0) {
179+
return {0, 0};
180+
}
181+
return get_file_range(lookup_result, _headerBitSize);
173182
}
174183

175184
bool

searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead
3030

3131
using DictionaryLookupResult = index::DictionaryLookupResult;
3232
using PostingListCounts = index::PostingListCounts;
33+
using PostingListFileRange = index::PostingListFileRange;
3334
using PostingListHandle = index::PostingListHandle;
3435

3536
/**
@@ -46,6 +47,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead
4647
PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override;
4748
void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
4849
double bloat_factor) const override;
50+
PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const override;
4951

5052
bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override;
5153
bool close() override;

searchlib/src/vespa/searchlib/index/bitvector_dictionary_lookup_result.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace search::index {
1313
class BitVectorDictionaryLookupResult {
1414
public:
1515
static constexpr uint32_t invalid = std::numeric_limits<uint32_t>::max();
16-
uint64_t idx;
16+
uint32_t idx;
1717

1818
explicit BitVectorDictionaryLookupResult(uint32_t idx_in) noexcept
1919
: idx(idx_in)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
2+
#pragma once
3+
4+
#include <cstdint>
5+
6+
namespace search::index {
7+
8+
/*
9+
* Range of a posting list file used for posting list. Might include padding
10+
* at start and end due to file format. Offsets are in bytes.
11+
*/
12+
struct PostingListFileRange {
13+
uint64_t start_offset;
14+
uint64_t end_offset;
15+
16+
PostingListFileRange(uint64_t start_offset_in, uint64_t end_offset_in)
17+
: start_offset(start_offset_in),
18+
end_offset(end_offset_in)
19+
{
20+
}
21+
uint64_t size() const noexcept { return end_offset - start_offset; }
22+
};
23+
24+
}

searchlib/src/vespa/searchlib/index/postinglistfile.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ PostingListFileRandReadPassThrough::consider_trim_posting_list(const DictionaryL
117117
return _lower->consider_trim_posting_list(lookup_result, handle, bloat_factor);
118118
}
119119

120+
PostingListFileRange
121+
PostingListFileRandReadPassThrough::get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const
122+
{
123+
return _lower->get_posting_list_file_range(lookup_result);
124+
}
125+
120126
bool
121127
PostingListFileRandReadPassThrough::open(const std::string &name,
122128
const TuneFileRandRead &tuneFileRead)

0 commit comments

Comments
 (0)