Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KAFKA-17367: Share coordinator impl. New merge batches algorithm. [3/N] #17149

Open
wants to merge 35 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
2128532
KAFKA-17367: Share coordinator impl. Added additional tests. [3/N]
smjn Sep 10, 2024
5f377fb
Improved test names.
smjn Sep 10, 2024
8fecb1d
removed extraneous comment.
smjn Sep 10, 2024
7042155
removed incorrect comment, added another overlap test.
smjn Sep 10, 2024
a299752
Modified combine batches logic.
smjn Sep 11, 2024
2e1d9a5
Added start offset based pruning.
smjn Sep 11, 2024
080d422
fixed comment.
smjn Sep 11, 2024
bce0cb3
Handle overlapping input.
smjn Sep 11, 2024
f112e0c
renamed method arg.
smjn Sep 11, 2024
fca3dcb
fixed bug in merge logic.
smjn Sep 11, 2024
1e88e7d
add delivery count as a sort dimension.
smjn Sep 11, 2024
0c7aadf
minor refactoring.
smjn Sep 11, 2024
e483bd2
merge logic overhaul.
smjn Sep 12, 2024
65f3b7b
minor optimization, fixed comments.
smjn Sep 12, 2024
fc548d2
added generator for tests.
smjn Sep 12, 2024
9049ab6
further optimized merge.
smjn Sep 12, 2024
1d1eb19
minor perf tweaks.
smjn Sep 13, 2024
c892bb0
removed extraneous prune.
smjn Sep 13, 2024
8fb168e
fixed comment.
smjn Sep 13, 2024
6993671
incorporated review comments.
smjn Sep 13, 2024
d20624b
incorporated further comments.
smjn Sep 18, 2024
6561d87
incorporated comments.
smjn Sep 23, 2024
d747dd3
Merge remote-tracking branch 'ak/trunk' into KAFKA-17367-3n
smjn Sep 25, 2024
60bda5d
Merge remote-tracking branch 'ak/trunk' into KAFKA-17367-3n
smjn Sep 25, 2024
2e5003c
Moved state batch merge code to util class.
smjn Sep 25, 2024
088bbf1
fixed documentation.
smjn Sep 25, 2024
32db0a6
incorporated review comments.
smjn Sep 26, 2024
4d2eaac
changed byte, short to int in tests.
smjn Sep 26, 2024
9928084
converted batch util to class.
smjn Sep 27, 2024
86d1237
renamed a few private methods.
smjn Sep 27, 2024
5e98616
added comprehensive javadoc.
smjn Sep 27, 2024
7a63481
minor bug fix.
smjn Sep 27, 2024
9d2e834
minor refactoring.
smjn Sep 28, 2024
0c7436d
create new arraylist from arguments.
smjn Sep 28, 2024
6f1725f
incorporated comments.
smjn Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions checkstyle/import-control-share-coordinator.xml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@
<allow pkg="org.apache.kafka.server.util.timer" />
<allow pkg="org.apache.kafka.timeline" />
<allow pkg="org.junit.jupiter.api" />
<allow pkg="org.junit.jupiter.params" />
<allow pkg="org.junit.jupiter.provider" />
<allow pkg="org.mockito" />
<allow pkg="org.slf4j" />
<subpackage name="generated">
Expand Down
6 changes: 6 additions & 0 deletions checkstyle/suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,12 @@
<suppress checks="NPathComplexity"
files="CoordinatorRuntime.java"/>

<!-- share coordinator -->
<suppress checks="NPathComplexity"
files="ShareCoordinatorShard.java"/>
<suppress checks="CyclomaticComplexity"
files="ShareCoordinatorShard.java"/>

<!-- storage -->
<suppress checks="CyclomaticComplexity"
files="(LogLoader|LogValidator|RemoteLogManagerConfig|RemoteLogManager).java"/>
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,17 @@
import org.apache.kafka.server.common.ApiMessageAndVersion;
import org.apache.kafka.server.config.ShareCoordinatorConfig;
import org.apache.kafka.server.share.PartitionFactory;
import org.apache.kafka.server.share.PersisterStateBatch;
import org.apache.kafka.server.share.SharePartitionKey;
import org.apache.kafka.timeline.SnapshotRegistry;
import org.apache.kafka.timeline.TimelineHashMap;

import org.slf4j.Logger;

import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

public class ShareCoordinatorShard implements CoordinatorShard<CoordinatorRecord> {
Expand Down Expand Up @@ -293,11 +291,11 @@ public CoordinatorResult<WriteShareGroupStateResponseData, CoordinatorRecord> wr

// Since the number of update records for this share part key exceeds snapshotUpdateRecordsPerSnapshot,
// we should be creating a share snapshot record.
List<PersisterOffsetsStateBatch> batchesToAdd = combineStateBatches(
shareStateMap.get(key).stateBatchAsSet(),
List<PersisterStateBatch> batchesToAdd = StateBatchUtil.combineStateBatches(
shareStateMap.get(key).stateBatches(),
partitionData.stateBatches().stream()
.map(PersisterOffsetsStateBatch::from)
.collect(Collectors.toCollection(LinkedHashSet::new)),
.map(PersisterStateBatch::from)
.collect(Collectors.toList()),
newStartOffset);

recordList = Collections.singletonList(ShareCoordinatorRecordHelpers.newShareSnapshotRecord(
Expand Down Expand Up @@ -527,7 +525,7 @@ CoordinatorMetricsShard getMetricsShard() {
private static ShareGroupOffset merge(ShareGroupOffset soFar, ShareUpdateValue newData) {
// snapshot epoch should be same as last share snapshot
// state epoch is not present
Set<PersisterOffsetsStateBatch> currentBatches = soFar.stateBatchAsSet();
List<PersisterStateBatch> currentBatches = soFar.stateBatches();
long newStartOffset = newData.startOffset() == -1 ? soFar.startOffset() : newData.startOffset();
int newLeaderEpoch = newData.leaderEpoch() == -1 ? soFar.leaderEpoch() : newData.leaderEpoch();

Expand All @@ -536,41 +534,12 @@ private static ShareGroupOffset merge(ShareGroupOffset soFar, ShareUpdateValue n
.setStateEpoch(soFar.stateEpoch())
.setStartOffset(newStartOffset)
.setLeaderEpoch(newLeaderEpoch)
.setStateBatches(combineStateBatches(currentBatches, newData.stateBatches().stream()
.map(PersisterOffsetsStateBatch::from)
.collect(Collectors.toCollection(LinkedHashSet::new)), newStartOffset))
.setStateBatches(StateBatchUtil.combineStateBatches(currentBatches, newData.stateBatches().stream()
.map(StateBatchUtil::toPersisterStateBatch)
.collect(Collectors.toList()), newStartOffset))
.build();
}

/**
* Util method which takes in 2 collections containing {@link PersisterOffsetsStateBatch}
* and the startOffset.
* It removes all batches from the 1st collection which have the same first and last offset
* as the batches in 2nd collection. It then creates a final list of batches which contains the
* former result and all the batches in the 2nd collection. In set notation (A - B) U B (we prefer batches in B
* which have same first and last offset in A).
* Finally, it removes any batches where the lastOffset < startOffset, if the startOffset > -1.
* @param currentBatch - collection containing current soft state of batches
* @param newBatch - collection containing batches in incoming request
* @param startOffset - startOffset to consider when removing old batches.
* @return List containing combined batches
*/
private static List<PersisterOffsetsStateBatch> combineStateBatches(
Collection<PersisterOffsetsStateBatch> currentBatch,
Collection<PersisterOffsetsStateBatch> newBatch,
long startOffset
) {
currentBatch.removeAll(newBatch);
List<PersisterOffsetsStateBatch> batchesToAdd = new LinkedList<>(currentBatch);
batchesToAdd.addAll(newBatch);
// Any batches where the last offset is < the current start offset
// are now expired. We should remove them from the persister.
if (startOffset != -1) {
batchesToAdd.removeIf(batch -> batch.lastOffset() < startOffset);
}
return batchesToAdd;
}

private static ApiMessage messageOrNull(ApiMessageAndVersion apiMessageAndVersion) {
if (apiMessageAndVersion == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
import org.apache.kafka.common.message.WriteShareGroupStateRequestData;
import org.apache.kafka.coordinator.share.generated.ShareSnapshotValue;
import org.apache.kafka.coordinator.share.generated.ShareUpdateValue;
import org.apache.kafka.server.share.PersisterStateBatch;

import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

/**
Expand All @@ -37,13 +37,13 @@ public class ShareGroupOffset {
private final int stateEpoch;
private final int leaderEpoch;
private final long startOffset;
private final List<PersisterOffsetsStateBatch> stateBatches;
private final List<PersisterStateBatch> stateBatches;

private ShareGroupOffset(int snapshotEpoch,
int stateEpoch,
int leaderEpoch,
long startOffset,
List<PersisterOffsetsStateBatch> stateBatches) {
List<PersisterStateBatch> stateBatches) {
this.snapshotEpoch = snapshotEpoch;
this.stateEpoch = stateEpoch;
this.leaderEpoch = leaderEpoch;
Expand All @@ -67,16 +67,16 @@ public long startOffset() {
return startOffset;
}

public List<PersisterOffsetsStateBatch> stateBatches() {
public List<PersisterStateBatch> stateBatches() {
return Collections.unmodifiableList(stateBatches);
}

private static PersisterOffsetsStateBatch toPersisterOffsetsStateBatch(ShareSnapshotValue.StateBatch stateBatch) {
return new PersisterOffsetsStateBatch(stateBatch.firstOffset(), stateBatch.lastOffset(), stateBatch.deliveryState(), stateBatch.deliveryCount());
private static PersisterStateBatch toPersisterOffsetsStateBatch(ShareSnapshotValue.StateBatch stateBatch) {
return new PersisterStateBatch(stateBatch.firstOffset(), stateBatch.lastOffset(), stateBatch.deliveryState(), stateBatch.deliveryCount());
}

private static PersisterOffsetsStateBatch toPersisterOffsetsStateBatch(ShareUpdateValue.StateBatch stateBatch) {
return new PersisterOffsetsStateBatch(stateBatch.firstOffset(), stateBatch.lastOffset(), stateBatch.deliveryState(), stateBatch.deliveryCount());
private static PersisterStateBatch toPersisterOffsetsStateBatch(ShareUpdateValue.StateBatch stateBatch) {
return new PersisterStateBatch(stateBatch.firstOffset(), stateBatch.lastOffset(), stateBatch.deliveryState(), stateBatch.deliveryCount());
}

public static ShareGroupOffset fromRecord(ShareSnapshotValue record) {
Expand All @@ -99,11 +99,11 @@ public static ShareGroupOffset fromRequest(WriteShareGroupStateRequestData.Parti
data.leaderEpoch(),
data.startOffset(),
data.stateBatches().stream()
.map(PersisterOffsetsStateBatch::from)
.map(PersisterStateBatch::from)
.collect(Collectors.toList()));
}

public Set<PersisterOffsetsStateBatch> stateBatchAsSet() {
public LinkedHashSet<PersisterStateBatch> stateBatchAsSet() {
return new LinkedHashSet<>(stateBatches);
}

Expand All @@ -112,7 +112,7 @@ public static class Builder {
private int stateEpoch;
private int leaderEpoch;
private long startOffset;
private List<PersisterOffsetsStateBatch> stateBatches;
private List<PersisterStateBatch> stateBatches;

public Builder setSnapshotEpoch(int snapshotEpoch) {
this.snapshotEpoch = snapshotEpoch;
Expand All @@ -134,7 +134,7 @@ public Builder setStartOffset(long startOffset) {
return this;
}

public Builder setStateBatches(List<PersisterOffsetsStateBatch> stateBatches) {
public Builder setStateBatches(List<PersisterStateBatch> stateBatches) {
this.stateBatches = stateBatches;
return this;
}
Expand Down
Loading