Skip to content

Commit 41a85e0

Browse files
Bugfix/lowering log volume (#6847)
In our zeal to debug some things we made the logs a bit noisy, lowering the volume for now. minor renaming of a method call to more accurately reflect that it's called on domain update
1 parent 838c678 commit 41a85e0

File tree

7 files changed

+25
-12
lines changed

7 files changed

+25
-12
lines changed

common/cache/domainCache.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,7 @@ UpdateLoop:
533533
if now.Sub(c.lastCallbackEmitTime) > 30*time.Minute {
534534
c.lastCallbackEmitTime = now
535535
c.scope.AddCounter(metrics.DomainCacheCallbacksCount, int64(len(c.callbacks)))
536+
c.scope.RecordHistogramDuration(metrics.DomainCacheUpdateLatency, c.timeSource.Now().Sub(now))
536537
}
537538

538539
return nil

common/log/tag/tags.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ func WorkflowTerminationReason(reason string) Tag {
229229
return newStringTag("wf-termination-reason", reason)
230230
}
231231

232+
func Duration(duration time.Duration) Tag {
233+
return newDurationTag("duration", duration)
234+
}
235+
232236
// domain related
233237

234238
// WorkflowDomainID returns tag for WorkflowDomainID

common/metrics/defs.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2275,6 +2275,7 @@ const (
22752275

22762276
DomainReplicationQueueSizeGauge
22772277
DomainReplicationQueueSizeErrorCount
2278+
DomainCacheUpdateLatency
22782279

22792280
ParentClosePolicyProcessorSuccess
22802281
ParentClosePolicyProcessorFailures
@@ -3022,6 +3023,7 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
30223023
CadenceShardFailureGauge: {metricName: "cadence_shard_failure", metricType: Gauge},
30233024
DomainReplicationQueueSizeGauge: {metricName: "domain_replication_queue_size", metricType: Gauge},
30243025
DomainReplicationQueueSizeErrorCount: {metricName: "domain_replication_queue_failed", metricType: Counter},
3026+
DomainCacheUpdateLatency: {metricName: "domain_cache_update_latency", metricType: Histogram, buckets: DomainCacheUpdateBuckets},
30253027
ParentClosePolicyProcessorSuccess: {metricName: "parent_close_policy_processor_requests", metricType: Counter},
30263028
ParentClosePolicyProcessorFailures: {metricName: "parent_close_policy_processor_errors", metricType: Counter},
30273029

@@ -3620,6 +3622,12 @@ var ResponseRowSizeBuckets = append(
36203622
tally.MustMakeExponentialValueBuckets(1, 2, 17)..., // 1..65536
36213623
)
36223624

3625+
// DomainCacheUpdateBuckets contain metric results for domain update operations
3626+
var DomainCacheUpdateBuckets = append(
3627+
tally.ValueBuckets{0}, // need an explicit 0 or zero is reported as 1
3628+
tally.MustMakeExponentialValueBuckets(1, 2, 17)..., // 1..65536
3629+
)
3630+
36233631
// ResponsePayloadSizeBuckets contains buckets for tracking the size of the payload returned per persistence operation
36243632
var ResponsePayloadSizeBuckets = append(
36253633
tally.ValueBuckets{0}, // need an explicit 0 or zero is reported as 1

service/history/engine/engineimpl/register_domain_failover_callback.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ func (e *historyEngineImpl) registerDomainFailoverCallback() {
5757
e.shard.GetDomainCache().RegisterDomainChangeCallback(
5858
e.shard.GetShardID(),
5959
e.shard.GetDomainNotificationVersion(),
60-
e.lockProcessingForFailover,
60+
e.lockTaskProcessingForDomainUpdate,
6161
e.domainChangeCB,
6262
)
6363
}
6464

6565
func (e *historyEngineImpl) domainChangeCB(nextDomains []*cache.DomainCacheEntry) {
6666
defer func() {
67-
e.unlockProcessingForFailover()
67+
e.unlockProcessingForDomainUpdate()
6868
}()
6969

7070
if len(nextDomains) == 0 {
@@ -149,14 +149,14 @@ func (e *historyEngineImpl) generateGracefulFailoverTasksForDomainUpdateCallback
149149
return failoverMarkerTasks
150150
}
151151

152-
func (e *historyEngineImpl) lockProcessingForFailover() {
153-
e.logger.Info("Locking processing for failover")
152+
func (e *historyEngineImpl) lockTaskProcessingForDomainUpdate() {
153+
e.logger.Debug("Locking processing for domain update")
154154
e.txProcessor.LockTaskProcessing()
155155
e.timerProcessor.LockTaskProcessing()
156156
}
157157

158-
func (e *historyEngineImpl) unlockProcessingForFailover() {
159-
e.logger.Info("Unlocking processing for failover")
158+
func (e *historyEngineImpl) unlockProcessingForDomainUpdate() {
159+
e.logger.Debug("Unlocking processing for failover")
160160
e.txProcessor.UnlockTaskProcessing()
161161
e.timerProcessor.UnlockTaskProcessing()
162162
}

service/history/engine/engineimpl/register_domain_failover_callback_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,6 @@ func TestDomainLocking(t *testing.T) {
674674
shard: shardCtx,
675675
}
676676

677-
he.lockProcessingForFailover()
678-
he.unlockProcessingForFailover()
677+
he.lockTaskProcessingForDomainUpdate()
678+
he.unlockProcessingForDomainUpdate()
679679
}

service/history/queue/timer_queue_processor.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,12 +386,12 @@ func (t *timerQueueProcessor) HandleAction(ctx context.Context, clusterName stri
386386
}
387387

388388
func (t *timerQueueProcessor) LockTaskProcessing() {
389-
t.logger.Info("Timer queue processor locking task processing")
389+
t.logger.Debug("Timer queue processor locking task processing")
390390
t.taskAllocator.Lock()
391391
}
392392

393393
func (t *timerQueueProcessor) UnlockTaskProcessing() {
394-
t.logger.Info("Timer queue processor unlocking task processing")
394+
t.logger.Debug("Timer queue processor unlocking task processing")
395395
t.taskAllocator.Unlock()
396396
}
397397

service/history/queue/transfer_queue_processor.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,12 +353,12 @@ func (t *transferQueueProcessor) HandleAction(
353353
}
354354

355355
func (t *transferQueueProcessor) LockTaskProcessing() {
356-
t.logger.Info("Transfer queue processor locking task processing")
356+
t.logger.Debug("Transfer queue processor locking task processing")
357357
t.taskAllocator.Lock()
358358
}
359359

360360
func (t *transferQueueProcessor) UnlockTaskProcessing() {
361-
t.logger.Info("Transfer queue processor unlocking task processing")
361+
t.logger.Debug("Transfer queue processor unlocking task processing")
362362
t.taskAllocator.Unlock()
363363
}
364364

0 commit comments

Comments
 (0)