Skip to content

Commit

Permalink
improvement(spark): Record the partition choosing sort time for analy…
Browse files Browse the repository at this point in the history
…sis (#2364)

### What changes were proposed in this pull request?

Record the partition choosing sort time for analysis

### Why are the changes needed?

For better optimize write duration

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Needn't

Co-authored-by: Junfan Zhang <[email protected]>
  • Loading branch information
zuston and Junfan Zhang authored Feb 7, 2025
1 parent 9e5cd61 commit e5cfc4a
Showing 1 changed file with 5 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ public class WriteBufferManager extends MemoryConsumer {
private long copyTime = 0;
private long serializeTime = 0;
private long compressTime = 0;
private long sortTime = 0;
private long writeTime = 0;
private long estimateTime = 0;
private long requireMemoryTime = 0;
Expand Down Expand Up @@ -372,9 +373,11 @@ public synchronized List<ShuffleBlockInfo> clear(double bufferSpillRatio) {
bufferSpillRatio = Math.max(0.1, Math.min(1.0, bufferSpillRatio));
List<Integer> partitionList = new ArrayList(buffers.keySet());
if (Double.compare(bufferSpillRatio, 1.0) < 0) {
long start = System.currentTimeMillis();
partitionList.sort(
Comparator.comparingInt(o -> buffers.get(o) == null ? 0 : buffers.get(o).getMemoryUsed())
.reversed());
sortTime += start;
targetSpillSize = (long) ((getUsedBytes() - getInSendListBytes()) * bufferSpillRatio);
}

Expand Down Expand Up @@ -643,6 +646,8 @@ public String getManagerCostInfo() {
+ writeTime
+ "], serializeTime["
+ serializeTime
+ "], sortTime["
+ sortTime
+ "], compressTime["
+ compressTime
+ "], estimateTime["
Expand Down

0 comments on commit e5cfc4a

Please sign in to comment.