Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
Signed-off-by: Hongbin Ma (Mahone) <[email protected]>
  • Loading branch information
binmahone committed Dec 10, 2024
1 parent 783cd27 commit af093a6
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ private final class SemaphoreTaskInfo(val stageId: Int, val taskAttemptId: Long)
semaphore.release(numPermits)
hasSemaphore = false
lastReleased = System.nanoTime()
GpuTaskMetrics.get.addGpuTime(lastReleased - lastAcquired)
GpuTaskMetrics.get.addSemaphoreHoldingTime(lastReleased - lastAcquired)
nvtxRange.foreach(_.close())
nvtxRange = None
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ class AvgLongAccumulator extends AccumulatorV2[jl.Long, jl.Double] {
}

class GpuTaskMetrics extends Serializable {
private val gpuTime = new NanoSecondAccumulator
private val semaphoreHoldingTime = new NanoSecondAccumulator
private val semWaitTimeNs = new NanoSecondAccumulator
private val retryCount = new LongAccumulator
private val splitAndRetryCount = new LongAccumulator
Expand Down Expand Up @@ -239,7 +239,7 @@ class GpuTaskMetrics extends Serializable {
}

private val metrics = Map[String, AccumulatorV2[_, _]](
"gpuTime" -> gpuTime,
"gpuTime" -> semaphoreHoldingTime,
"gpuSemaphoreWait" -> semWaitTimeNs,
"gpuRetryCount" -> retryCount,
"gpuSplitAndRetryCount" -> splitAndRetryCount,
Expand Down Expand Up @@ -290,7 +290,7 @@ class GpuTaskMetrics extends Serializable {
}
}

def addGpuTime(duration: Long): Unit = gpuTime.add(duration)
def addSemaphoreHoldingTime(duration: Long): Unit = semaphoreHoldingTime.add(duration)

def getSemWaitTime(): Long = semWaitTimeNs.value.value

Expand Down

0 comments on commit af093a6

Please sign in to comment.