|
17 | 17 |
|
18 | 18 | use crate::TOKIO_RUNTIME; |
19 | 19 | use crate::*; |
| 20 | +use fluss::client::EARLIEST_OFFSET; |
| 21 | +use fluss::rpc::message::OffsetSpec; |
20 | 22 | use pyo3_async_runtimes::tokio::future_into_py; |
21 | | -use std::collections::HashSet; |
22 | 23 | use std::sync::Arc; |
23 | 24 |
|
24 | | -const EARLIEST_OFFSET: i64 = -2; |
25 | | - |
26 | 25 | /// Represents a Fluss table for data operations |
27 | 26 | #[pyclass] |
28 | 27 | pub struct FlussTable { |
@@ -70,8 +69,12 @@ impl FlussTable { |
70 | 69 |
|
71 | 70 | let rust_scanner = table_scan.create_log_scanner(); |
72 | 71 |
|
73 | | - let py_scanner = LogScanner::from_core(rust_scanner, table_info.clone()); |
| 72 | + let admin = conn |
| 73 | + .get_admin() |
| 74 | + .await |
| 75 | + .map_err(|e| FlussError::new_err(e.to_string()))?; |
74 | 76 |
|
| 77 | + let py_scanner = LogScanner::from_core(rust_scanner, admin, table_info.clone()); |
75 | 78 | Python::with_gil(|py| Py::new(py, py_scanner)) |
76 | 79 | }) |
77 | 80 | } |
@@ -275,6 +278,7 @@ impl AppendWriter { |
275 | 278 | #[pyclass] |
276 | 279 | pub struct LogScanner { |
277 | 280 | inner: fcore::client::LogScanner, |
| 281 | + admin: fcore::client::FlussAdmin, |
278 | 282 | table_info: fcore::metadata::TableInfo, |
279 | 283 | #[allow(dead_code)] |
280 | 284 | start_timestamp: Option<i64>, |
@@ -327,50 +331,50 @@ impl LogScanner { |
327 | 331 | let bucket_ids: Vec<i32> = (0..num_buckets).collect(); |
328 | 332 |
|
329 | 333 | // todo: after supporting list_offsets with timestamp, we can use start_timestamp and end_timestamp here |
330 | | - let target_offsets: HashMap<i32, i64> = TOKIO_RUNTIME |
331 | | - .block_on(async { self.inner.list_offsets_latest(bucket_ids).await }) |
| 334 | + let mut stopping_offsets: HashMap<i32, i64> = TOKIO_RUNTIME |
| 335 | + .block_on(async { |
| 336 | + self.admin |
| 337 | + .list_offsets( |
| 338 | + &self.table_info.table_path, |
| 339 | + bucket_ids.as_slice(), |
| 340 | + OffsetSpec::Latest, |
| 341 | + ) |
| 342 | + .await |
| 343 | + }) |
332 | 344 | .map_err(|e| FlussError::new_err(e.to_string()))?; |
333 | 345 |
|
334 | | - let mut current_offsets: HashMap<i32, i64> = HashMap::new(); |
335 | | - let mut completed_buckets: HashSet<i32> = HashSet::new(); |
336 | | - |
337 | | - if !target_offsets.is_empty() { |
| 346 | + if !stopping_offsets.is_empty() { |
338 | 347 | loop { |
339 | 348 | let batch_result = TOKIO_RUNTIME |
340 | 349 | .block_on(async { self.inner.poll(Duration::from_millis(500)).await }); |
341 | 350 |
|
342 | 351 | match batch_result { |
343 | 352 | Ok(scan_records) => { |
344 | | - let mut filtered_records: HashMap< |
345 | | - fcore::metadata::TableBucket, |
346 | | - Vec<fcore::record::ScanRecord>, |
347 | | - > = HashMap::new(); |
348 | | - for (bucket, records) in scan_records.records_by_buckets() { |
349 | | - let bucket_id = bucket.bucket_id(); |
350 | | - if completed_buckets.contains(&bucket_id) { |
| 353 | + let mut result_records: Vec<fcore::record::ScanRecord> = vec![]; |
| 354 | + for (bucket, records) in scan_records.into_records_by_buckets() { |
| 355 | + let stopping_offset = stopping_offsets.get(&bucket.bucket_id()); |
| 356 | + |
| 357 | + if stopping_offset.is_none() { |
| 358 | + // not to include this bucket, skip records for this bucket |
| 359 | + // since we already reach end offset for this bucket |
351 | 360 | continue; |
352 | 361 | } |
353 | 362 | if let Some(last_record) = records.last() { |
354 | 363 | let offset = last_record.offset(); |
355 | | - current_offsets.insert(bucket_id, offset); |
356 | | - filtered_records.insert(bucket.clone(), records.clone()); |
357 | | - if offset >= target_offsets[&bucket_id] - 1 { |
358 | | - completed_buckets.insert(bucket_id); |
| 364 | + result_records.extend(records); |
| 365 | + if offset >= stopping_offset.unwrap() - 1 { |
| 366 | + stopping_offsets.remove(&bucket.bucket_id()); |
359 | 367 | } |
360 | 368 | } |
361 | 369 | } |
362 | 370 |
|
363 | | - if !filtered_records.is_empty() { |
364 | | - let filtered_scan_records = |
365 | | - fcore::record::ScanRecords::new(filtered_records); |
366 | | - let arrow_batch = |
367 | | - Utils::convert_scan_records_to_arrow(filtered_scan_records); |
| 371 | + if !result_records.is_empty() { |
| 372 | + let arrow_batch = Utils::convert_scan_records_to_arrow(result_records); |
368 | 373 | all_batches.extend(arrow_batch); |
369 | 374 | } |
370 | 375 |
|
371 | | - // completed bucket is equal to all target buckets, |
372 | | - // we can break scan records |
373 | | - if completed_buckets.len() == target_offsets.len() { |
| 376 | + // we have reach end offsets of all bucket |
| 377 | + if stopping_offsets.is_empty() { |
374 | 378 | break; |
375 | 379 | } |
376 | 380 | } |
@@ -399,11 +403,13 @@ impl LogScanner { |
399 | 403 | impl LogScanner { |
400 | 404 | /// Create LogScanner from core LogScanner |
401 | 405 | pub fn from_core( |
402 | | - inner: fcore::client::LogScanner, |
| 406 | + inner_scanner: fcore::client::LogScanner, |
| 407 | + admin: fcore::client::FlussAdmin, |
403 | 408 | table_info: fcore::metadata::TableInfo, |
404 | 409 | ) -> Self { |
405 | 410 | Self { |
406 | | - inner, |
| 411 | + inner: inner_scanner, |
| 412 | + admin, |
407 | 413 | table_info, |
408 | 414 | start_timestamp: None, |
409 | 415 | end_timestamp: None, |
|
0 commit comments