Skip to content

Commit

Permalink
API,Core: Add scan planning metrics for indexed/eq/pos delete files (a…
Browse files Browse the repository at this point in the history
…pache#5809)

* API,Core: Add scan planning metrics for indexed/eq/pos delete files

* evaluate deletes files only once
  • Loading branch information
nastra authored Sep 30, 2022
1 parent 6d74364 commit 9eb5cbd
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 0 deletions.
30 changes: 30 additions & 0 deletions api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ interface ScanMetricsResult {
@Nullable
CounterResult skippedDeleteManifests();

@Nullable
CounterResult indexedDeleteFiles();

@Nullable
CounterResult equalityDeleteFiles();

@Nullable
CounterResult positionalDeleteFiles();

static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
Preconditions.checkArgument(null != scanMetrics, "Invalid scan metrics: null");
return ImmutableScanMetricsResult.builder()
Expand All @@ -154,6 +163,9 @@ static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
.skippedDeleteFiles(CounterResult.fromCounter(scanMetrics.skippedDeleteFiles()))
.scannedDeleteManifests(CounterResult.fromCounter(scanMetrics.scannedDeleteManifests()))
.skippedDeleteManifests(CounterResult.fromCounter(scanMetrics.skippedDeleteManifests()))
.indexedDeleteFiles(CounterResult.fromCounter(scanMetrics.indexedDeleteFiles()))
.equalityDeleteFiles(CounterResult.fromCounter(scanMetrics.equalityDeleteFiles()))
.positionalDeleteFiles(CounterResult.fromCounter(scanMetrics.positionalDeleteFiles()))
.build();
}
}
Expand All @@ -174,6 +186,9 @@ abstract class ScanMetrics {
public static final String SKIPPED_DELETE_MANIFESTS = "skipped-delete-manifests";
public static final String SKIPPED_DATA_FILES = "skipped-data-files";
public static final String SKIPPED_DELETE_FILES = "skipped-delete-files";
public static final String INDEXED_DELETE_FILES = "indexed-delete-files";
public static final String EQUALITY_DELETE_FILES = "equality-delete-files";
public static final String POSITIONAL_DELETE_FILES = "positional-delete-files";

public static ScanMetrics noop() {
return ScanMetrics.of(MetricsContext.nullMetrics());
Expand Down Expand Up @@ -246,6 +261,21 @@ public Counter skippedDeleteManifests() {
return metricsContext().counter(SKIPPED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
}

@Value.Derived
public Counter indexedDeleteFiles() {
return metricsContext().counter(INDEXED_DELETE_FILES, MetricsContext.Unit.COUNT);
}

@Value.Derived
public Counter equalityDeleteFiles() {
return metricsContext().counter(EQUALITY_DELETE_FILES, MetricsContext.Unit.COUNT);
}

@Value.Derived
public Counter positionalDeleteFiles() {
return metricsContext().counter(POSITIONAL_DELETE_FILES, MetricsContext.Unit.COUNT);
}

public static ScanMetrics of(MetricsContext metricsContext) {
return ImmutableScanMetrics.builder().metricsContext(metricsContext).build();
}
Expand Down
13 changes: 13 additions & 0 deletions core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,19 @@ DeleteFileIndex build() {
}
}

scanMetrics.indexedDeleteFiles().increment(deleteEntries.size());
deleteFilesByPartition
.values()
.forEach(
entry -> {
FileContent content = entry.file().content();
if (content == FileContent.EQUALITY_DELETES) {
scanMetrics.equalityDeleteFiles().increment();
} else if (content == FileContent.POSITION_DELETES) {
scanMetrics.positionalDeleteFiles().increment();
}
});

return new DeleteFileIndex(
specsById, globalApplySeqs, globalDeletes, sortedDeletesByPartition);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,21 @@ static void toJson(ScanMetricsResult metrics, JsonGenerator gen) throws IOExcept
CounterResultParser.toJson(metrics.skippedDeleteManifests(), gen);
}

if (null != metrics.indexedDeleteFiles()) {
gen.writeFieldName(ScanMetrics.INDEXED_DELETE_FILES);
CounterResultParser.toJson(metrics.indexedDeleteFiles(), gen);
}

if (null != metrics.equalityDeleteFiles()) {
gen.writeFieldName(ScanMetrics.EQUALITY_DELETE_FILES);
CounterResultParser.toJson(metrics.equalityDeleteFiles(), gen);
}

if (null != metrics.positionalDeleteFiles()) {
gen.writeFieldName(ScanMetrics.POSITIONAL_DELETE_FILES);
CounterResultParser.toJson(metrics.positionalDeleteFiles(), gen);
}

gen.writeEndObject();
}

Expand Down Expand Up @@ -142,6 +157,10 @@ static ScanMetricsResult fromJson(JsonNode json) {
CounterResultParser.fromJson(ScanMetrics.SCANNED_DELETE_MANIFESTS, json))
.skippedDeleteManifests(
CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_MANIFESTS, json))
.indexedDeleteFiles(CounterResultParser.fromJson(ScanMetrics.INDEXED_DELETE_FILES, json))
.equalityDeleteFiles(CounterResultParser.fromJson(ScanMetrics.EQUALITY_DELETE_FILES, json))
.positionalDeleteFiles(
CounterResultParser.fromJson(ScanMetrics.POSITIONAL_DELETE_FILES, json))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ public void scanningWithDeletes() throws IOException {
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(20L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
assertThat(result.indexedDeleteFiles().value()).isEqualTo(2);
assertThat(result.equalityDeleteFiles().value()).isEqualTo(0);
assertThat(result.positionalDeleteFiles().value()).isEqualTo(2);
}

@Test
Expand Down Expand Up @@ -212,6 +215,33 @@ public void scanningWithSkippedDeleteFiles() throws IOException {
assertThat(result.totalDeleteManifests().value()).isEqualTo(2);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.indexedDeleteFiles().value()).isEqualTo(1);
assertThat(result.equalityDeleteFiles().value()).isEqualTo(1);
assertThat(result.positionalDeleteFiles().value()).isEqualTo(0);
}

@Test
public void scanningWithEqualityAndPositionalDeleteFiles() throws IOException {
String tableName = "scan-planning-with-eq-and-pos-delete-files";
Table table =
TestTables.create(
tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(), formatVersion, reporter);
table.newAppend().appendFile(FILE_A).commit();
// FILE_A_DELETES = positionalDelete / FILE_A2_DELETES = equalityDelete
table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_A2_DELETES).commit();
TableScan tableScan = table.newScan();

try (CloseableIterable<FileScanTask> fileScanTasks =
tableScan.filter(Expressions.equal("data", "6")).planFiles()) {
fileScanTasks.forEach(task -> {});
}

ScanReport scanReport = reporter.lastReport();
assertThat(scanReport).isNotNull();
ScanMetricsResult result = scanReport.scanMetrics();
assertThat(result.indexedDeleteFiles().value()).isEqualTo(2);
assertThat(result.equalityDeleteFiles().value()).isEqualTo(1);
assertThat(result.positionalDeleteFiles().value()).isEqualTo(1);
}

private static class TestScanReporter implements ScanReporter {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ public void extraFields() {
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);
scanMetrics.indexedDeleteFiles().increment(10L);
scanMetrics.positionalDeleteFiles().increment(6L);
scanMetrics.equalityDeleteFiles().increment(4L);

ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);
Assertions.assertThat(
Expand All @@ -195,6 +198,9 @@ public void extraFields() {
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"indexed-delete-files\":{\"unit\":\"count\",\"value\":10},"
+ "\"equality-delete-files\":{\"unit\":\"count\",\"value\":4},"
+ "\"positional-delete-files\":{\"unit\":\"count\",\"value\":6},"
+ "\"extra\": \"value\",\"extra2\":23}"))
.isEqualTo(scanMetricsResult);
}
Expand Down Expand Up @@ -236,6 +242,9 @@ public void roundTripSerde() {
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);
scanMetrics.indexedDeleteFiles().increment(10L);
scanMetrics.positionalDeleteFiles().increment(6L);
scanMetrics.equalityDeleteFiles().increment(4L);

ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);

Expand Down Expand Up @@ -293,6 +302,18 @@ public void roundTripSerde() {
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"indexed-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 10\n"
+ " },\n"
+ " \"equality-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 4\n"
+ " },\n"
+ " \"positional-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 6\n"
+ " }\n"
+ "}";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ public void extraFields() {
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);
scanMetrics.indexedDeleteFiles().increment(10L);
scanMetrics.positionalDeleteFiles().increment(6L);
scanMetrics.equalityDeleteFiles().increment(4L);

String tableName = "roundTripTableName";
Schema projection =
Expand Down Expand Up @@ -114,6 +117,9 @@ public void extraFields() {
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"indexed-delete-files\":{\"unit\":\"count\",\"value\":10},"
+ "\"equality-delete-files\":{\"unit\":\"count\",\"value\":4},"
+ "\"positional-delete-files\":{\"unit\":\"count\",\"value\":6},"
+ "\"extra-metric\":\"extra-val\"},"
+ "\"extra\":\"extraVal\"}"))
.usingRecursiveComparison()
Expand Down Expand Up @@ -174,6 +180,9 @@ public void roundTripSerde() {
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);
scanMetrics.indexedDeleteFiles().increment(10L);
scanMetrics.positionalDeleteFiles().increment(6L);
scanMetrics.equalityDeleteFiles().increment(4L);

String tableName = "roundTripTableName";
Schema projection =
Expand Down Expand Up @@ -256,6 +265,18 @@ public void roundTripSerde() {
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"indexed-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 10\n"
+ " },\n"
+ " \"equality-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 4\n"
+ " },\n"
+ " \"positional-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 6\n"
+ " }\n"
+ " }\n"
+ "}";
Expand Down

0 comments on commit 9eb5cbd

Please sign in to comment.