Skip to content

Commit f6764b0

Browse files
ldanilekConvex, Inc.
authored andcommitted
[Transaction] batch index_range (#23706)
refactor, not changing behavior yet. we thread the batching up, from TransactionIndex::index_range up through UserFacingModel::index_range. the Query layer is still unbatched, but this refactor unblocks that. GitOrigin-RevId: 9704db01e647a72f519b28a6e92de2da7ed33523
1 parent 0ee1d31 commit f6764b0

File tree

5 files changed

+270
-181
lines changed

5 files changed

+270
-181
lines changed

crates/common/src/types/index.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ pub type TabletIndexName = GenericIndexName<TableId>;
112112

113113
/// Like TabletIndexName in that it refers to a stable underlying index,
114114
/// but it works for virtual tables too.
115-
#[derive(Debug)]
115+
#[derive(Debug, Clone)]
116116
pub enum StableIndexName {
117117
Physical(TabletIndexName),
118118
Virtual(IndexName, TabletIndexName),

crates/database/src/bootstrap_model/user_facing.rs

Lines changed: 82 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,7 @@ use common::{
1010
ResolvedDocument,
1111
},
1212
index::IndexKeyBytes,
13-
interval::Interval,
14-
query::{
15-
CursorPosition,
16-
Order,
17-
},
13+
query::CursorPosition,
1814
runtime::Runtime,
1915
types::{
2016
StableIndexName,
@@ -41,7 +37,10 @@ use crate::{
4137
log_virtual_table_get,
4238
log_virtual_table_query,
4339
},
44-
transaction::MAX_PAGE_SIZE,
40+
transaction::{
41+
IndexRangeRequest,
42+
MAX_PAGE_SIZE,
43+
},
4544
unauthorized_error,
4645
virtual_tables::VirtualTable,
4746
PatchValue,
@@ -320,72 +319,106 @@ impl<'a, RT: Runtime> UserFacingModel<'a, RT> {
320319
)
321320
}
322321

323-
/// NOTE: returns a page of results. Callers must call record_read_document
324-
/// for all documents returned from the index stream.
325-
#[convex_macro::instrument_future]
326-
pub async fn index_range(
322+
async fn start_index_range(
327323
&mut self,
328-
stable_index_name: &StableIndexName,
329-
interval: &Interval,
330-
order: Order,
331-
mut max_rows: usize,
332-
version: Option<Version>,
333-
) -> anyhow::Result<(
334-
Vec<(IndexKeyBytes, DeveloperDocument, WriteTimestamp)>,
335-
CursorPosition,
336-
)> {
337-
if interval.is_empty() {
338-
return Ok((vec![], CursorPosition::End));
324+
request: IndexRangeRequest,
325+
) -> anyhow::Result<
326+
Result<
327+
(
328+
Vec<(IndexKeyBytes, DeveloperDocument, WriteTimestamp)>,
329+
CursorPosition,
330+
),
331+
RangeRequest,
332+
>,
333+
> {
334+
if request.interval.is_empty() {
335+
return Ok(Ok((vec![], CursorPosition::End)));
339336
}
340337

341-
max_rows = cmp::min(max_rows, MAX_PAGE_SIZE);
338+
let max_rows = cmp::min(request.max_rows, MAX_PAGE_SIZE);
342339

343-
let tablet_index_name = match stable_index_name {
340+
let tablet_index_name = match request.stable_index_name {
344341
StableIndexName::Physical(tablet_index_name) => tablet_index_name,
345342
StableIndexName::Virtual(index_name, tablet_index_name) => {
346343
log_virtual_table_query();
347-
return VirtualTable::new(self.tx)
344+
// TODO(lee) batch virtual table queryStreamNext
345+
let virtual_result = VirtualTable::new(self.tx)
348346
.index_range(
349347
RangeRequest {
350348
index_name: tablet_index_name.clone(),
351349
printable_index_name: index_name.clone(),
352-
interval: interval.clone(),
353-
order,
350+
interval: request.interval.clone(),
351+
order: request.order,
354352
max_size: max_rows,
355353
},
356-
version,
354+
request.version,
357355
)
358-
.await;
356+
.await?;
357+
return Ok(Ok(virtual_result));
359358
},
360359
StableIndexName::Missing => {
361-
return Ok((vec![], CursorPosition::End));
360+
return Ok(Ok((vec![], CursorPosition::End)));
362361
},
363362
};
364363
let index_name = tablet_index_name
365364
.clone()
366365
.map_table(&self.tx.table_mapping().tablet_to_name())?;
366+
Ok(Err(RangeRequest {
367+
index_name: tablet_index_name.clone(),
368+
printable_index_name: index_name,
369+
interval: request.interval.clone(),
370+
order: request.order,
371+
max_size: max_rows,
372+
}))
373+
}
374+
375+
/// NOTE: returns a page of results. Callers must call record_read_document
376+
/// for all documents returned from the index stream.
377+
#[convex_macro::instrument_future]
378+
pub async fn index_range_batch(
379+
&mut self,
380+
requests: BTreeMap<BatchKey, IndexRangeRequest>,
381+
) -> BTreeMap<
382+
BatchKey,
383+
anyhow::Result<(
384+
Vec<(IndexKeyBytes, DeveloperDocument, WriteTimestamp)>,
385+
CursorPosition,
386+
)>,
387+
> {
388+
let batch_size = requests.len();
389+
let mut results = BTreeMap::new();
390+
let mut fetch_requests = BTreeMap::new();
391+
for (batch_key, request) in requests {
392+
match self.start_index_range(request).await {
393+
Err(e) => {
394+
results.insert(batch_key, Err(e));
395+
},
396+
Ok(Ok(result)) => {
397+
results.insert(batch_key, Ok(result));
398+
},
399+
Ok(Err(request)) => {
400+
fetch_requests.insert(batch_key, request);
401+
},
402+
}
403+
}
367404

368-
let (results, cursor) = self
405+
let fetch_results = self
369406
.tx
370407
.index
371-
.range(
372-
&mut self.tx.reads,
373-
RangeRequest {
374-
index_name: tablet_index_name.clone(),
375-
printable_index_name: index_name,
376-
interval: interval.clone(),
377-
order,
378-
max_size: max_rows,
379-
},
380-
)
381-
.await?;
382-
let developer_results = results
383-
.into_iter()
384-
.map(|(key, doc, ts)| {
385-
let doc = doc.to_developer();
386-
anyhow::Ok((key, doc, ts))
387-
})
388-
.try_collect()?;
389-
Ok((developer_results, cursor))
408+
.range_batch(&mut self.tx.reads, fetch_requests)
409+
.await;
410+
411+
for (batch_key, fetch_result) in fetch_results {
412+
let result = fetch_result.map(|(resolved_results, cursor)| {
413+
let developer_results = resolved_results
414+
.into_iter()
415+
.map(|(key, doc, ts)| (key, doc.to_developer(), ts))
416+
.collect();
417+
(developer_results, cursor)
418+
});
419+
results.insert(batch_key, result);
420+
}
421+
assert_eq!(results.len(), batch_size);
422+
results
390423
}
391424
}

crates/database/src/query/index_range.rs

Lines changed: 82 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::{
33
collections::VecDeque,
44
};
55

6+
use anyhow::Context;
67
use async_trait::async_trait;
78
use common::{
89
document::GenericDocument,
@@ -24,6 +25,7 @@ use common::{
2425
},
2526
version::Version,
2627
};
28+
use maplit::btreemap;
2729

2830
use super::{
2931
query_scanned_too_many_documents_error,
@@ -35,6 +37,7 @@ use super::{
3537
};
3638
use crate::{
3739
metrics,
40+
transaction::IndexRangeRequest,
3841
Transaction,
3942
};
4043

@@ -138,80 +141,94 @@ impl<T: QueryType> IndexRange<T> {
138141
}
139142
}
140143

141-
#[convex_macro::instrument_future]
142-
async fn _next<RT: Runtime>(
144+
fn start_next<RT: Runtime>(
143145
&mut self,
144146
tx: &mut Transaction<RT>,
145147
prefetch_hint: Option<usize>,
146-
) -> anyhow::Result<Option<(GenericDocument<T::T>, WriteTimestamp)>> {
147-
loop {
148-
// If we have an end cursor, for correctness we need to process
149-
// the entire interval, so ignore `maximum_rows_read` and `maximum_bytes_read`.
150-
let enforce_limits = self.cursor_interval.end_inclusive.is_none();
148+
) -> anyhow::Result<Result<Option<(GenericDocument<T::T>, WriteTimestamp)>, IndexRangeRequest>>
149+
{
150+
// If we have an end cursor, for correctness we need to process
151+
// the entire interval, so ignore `maximum_rows_read` and `maximum_bytes_read`.
152+
let enforce_limits = self.cursor_interval.end_inclusive.is_none();
151153

152-
if enforce_limits
153-
&& let Some(maximum_bytes_read) = self.maximum_bytes_read
154-
&& self.returned_bytes >= maximum_bytes_read
155-
{
156-
// If we're over our data budget, throw an error.
157-
// We do this after we've already exceeded the limit to ensure that
158-
// paginated queries always scan at least one item so they can
159-
// make progress.
160-
return Err(query_scanned_too_much_data(self.returned_bytes).into());
161-
}
154+
if enforce_limits
155+
&& let Some(maximum_bytes_read) = self.maximum_bytes_read
156+
&& self.returned_bytes >= maximum_bytes_read
157+
{
158+
// If we're over our data budget, throw an error.
159+
// We do this after we've already exceeded the limit to ensure that
160+
// paginated queries always scan at least one item so they can
161+
// make progress.
162+
return Err(query_scanned_too_much_data(self.returned_bytes).into());
163+
}
162164

163-
if let Some((index_position, v, timestamp)) = self.page.pop_front() {
164-
let index_bytes = index_position.len();
165-
if let Some(intermediate_cursors) = &mut self.intermediate_cursors {
166-
intermediate_cursors.push(CursorPosition::After(index_position.clone()));
167-
}
168-
self.cursor_interval.curr_exclusive = Some(CursorPosition::After(index_position));
169-
self.returned_results += 1;
170-
T::record_read_document(tx, &v, self.printable_index_name.table())?;
171-
// Database bandwidth for index reads
172-
tx.usage_tracker.track_database_egress_size(
173-
self.printable_index_name.table().to_string(),
174-
index_bytes as u64,
175-
self.printable_index_name.is_system_owned(),
176-
);
177-
self.returned_bytes += v.size();
178-
return Ok(Some((v, timestamp)));
179-
}
180-
if let Some(CursorPosition::End) = self.cursor_interval.curr_exclusive {
181-
return Ok(None);
182-
}
183-
if self.unfetched_interval.is_empty() {
184-
// We're out of results. If we have an end cursor then we must
185-
// have reached it. Otherwise we're at the end of the entire
186-
// query.
187-
self.cursor_interval.curr_exclusive = Some(
188-
self.cursor_interval
189-
.end_inclusive
190-
.clone()
191-
.unwrap_or(CursorPosition::End),
192-
);
193-
return Ok(None);
165+
if let Some((index_position, v, timestamp)) = self.page.pop_front() {
166+
let index_bytes = index_position.len();
167+
if let Some(intermediate_cursors) = &mut self.intermediate_cursors {
168+
intermediate_cursors.push(CursorPosition::After(index_position.clone()));
194169
}
170+
self.cursor_interval.curr_exclusive = Some(CursorPosition::After(index_position));
171+
self.returned_results += 1;
172+
T::record_read_document(tx, &v, self.printable_index_name.table())?;
173+
// Database bandwidth for index reads
174+
tx.usage_tracker.track_database_egress_size(
175+
self.printable_index_name.table().to_string(),
176+
index_bytes as u64,
177+
self.printable_index_name.is_system_owned(),
178+
);
179+
self.returned_bytes += v.size();
180+
return Ok(Ok(Some((v, timestamp))));
181+
}
182+
if let Some(CursorPosition::End) = self.cursor_interval.curr_exclusive {
183+
return Ok(Ok(None));
184+
}
185+
if self.unfetched_interval.is_empty() {
186+
// We're out of results. If we have an end cursor then we must
187+
// have reached it. Otherwise we're at the end of the entire
188+
// query.
189+
self.cursor_interval.curr_exclusive = Some(
190+
self.cursor_interval
191+
.end_inclusive
192+
.clone()
193+
.unwrap_or(CursorPosition::End),
194+
);
195+
return Ok(Ok(None));
196+
}
195197

196-
let mut max_rows = prefetch_hint
197-
.unwrap_or(DEFAULT_QUERY_PREFETCH)
198-
.clamp(1, MAX_QUERY_FETCH);
198+
let mut max_rows = prefetch_hint
199+
.unwrap_or(DEFAULT_QUERY_PREFETCH)
200+
.clamp(1, MAX_QUERY_FETCH);
199201

200-
if enforce_limits && let Some(maximum_rows_read) = self.maximum_rows_read {
201-
if self.rows_read >= maximum_rows_read {
202-
return Err(query_scanned_too_many_documents_error(self.rows_read).into());
203-
}
204-
max_rows = cmp::min(max_rows, maximum_rows_read - self.rows_read);
202+
if enforce_limits && let Some(maximum_rows_read) = self.maximum_rows_read {
203+
if self.rows_read >= maximum_rows_read {
204+
return Err(query_scanned_too_many_documents_error(self.rows_read).into());
205205
}
206-
let (page, fetch_cursor) = T::index_range(
207-
tx,
208-
&self.stable_index_name,
209-
&self.unfetched_interval,
210-
self.order,
211-
max_rows,
212-
self.version.clone(),
213-
)
214-
.await?;
206+
max_rows = cmp::min(max_rows, maximum_rows_read - self.rows_read);
207+
}
208+
Ok(Err(IndexRangeRequest {
209+
stable_index_name: self.stable_index_name.clone(),
210+
interval: self.unfetched_interval.clone(),
211+
order: self.order,
212+
max_rows,
213+
version: self.version.clone(),
214+
}))
215+
}
216+
217+
#[convex_macro::instrument_future]
218+
async fn _next<RT: Runtime>(
219+
&mut self,
220+
tx: &mut Transaction<RT>,
221+
prefetch_hint: Option<usize>,
222+
) -> anyhow::Result<Option<(GenericDocument<T::T>, WriteTimestamp)>> {
223+
loop {
224+
let request = match self.start_next(tx, prefetch_hint)? {
225+
Ok(result) => return Ok(result),
226+
Err(request) => request,
227+
};
228+
let (page, fetch_cursor) = T::index_range_batch(tx, btreemap! {0 => request})
229+
.await
230+
.remove(&0)
231+
.context("batch_key missing")??;
215232
let (_, new_unfetched_interval) =
216233
self.unfetched_interval.split(fetch_cursor, self.order);
217234
anyhow::ensure!(self.unfetched_interval != new_unfetched_interval);

0 commit comments

Comments
 (0)