Skip to content

Commit 9f21756

Browse files
committed
Misc fixes and improvements.
1. fix redis latency metrics. 2. add more error labels. 3. fetch from DB when unmarshal failed.
1 parent b67fa25 commit 9f21756

File tree

1 file changed

+82
-60
lines changed

1 file changed

+82
-60
lines changed

cache.go

Lines changed: 82 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -139,19 +139,26 @@ type metricSet struct {
139139
Error *prometheus.CounterVec
140140
}
141141

142+
type metricHitLabel string
143+
type metricErrLabel string
144+
142145
var (
143-
hitLabels = []string{"hit"}
144-
hitLabelMemory = "mem"
145-
hitLabelRedis = "redis"
146-
hitLabelDB = "db"
146+
hitLabels = []string{"hit"}
147+
// metrics hit labels
148+
hitLabelMemory metricHitLabel = "mem"
149+
hitLabelRedis metricHitLabel = "redis"
150+
hitLabelDB metricHitLabel = "db"
147151
// The unit is ms.
148152
latencyBucket = []float64{
149153
1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}
150-
// errors
151-
errLabels = []string{"when"}
152-
errLabelSetRedis = "set_redis"
153-
errLabelSetMemCache = "set_mem_cache"
154-
errLabelInvalidate = "invalidate_error"
154+
155+
errLabels = []string{"when"}
156+
// metrics error labels
157+
errLabelSetRedis metricErrLabel = "set_redis"
158+
errLabelSetMemCache metricErrLabel = "set_mem_cache"
159+
errLabelInvalidate metricErrLabel = "invalidate_error"
160+
errLabelMemoryUnmarshalFailed metricErrLabel = "mem_unmarshal_failed"
161+
errLabelRedisUnmarshalFailed metricErrLabel = "redis_unmarshal_failed"
155162
)
156163

157164
func newMetricSet(appName string) *metricSet {
@@ -294,15 +301,22 @@ func (c *DCache) Close() {
294301
}
295302
}
296303

297-
func (c *DCache) recordLatency(label string, startedAt time.Time) func() {
304+
func (c *DCache) makeHitRecorder(label metricHitLabel, startedAt time.Time) func() {
298305
return func() {
299306
if c.stats != nil {
300-
c.stats.Latency.WithLabelValues(label).Observe(
307+
c.stats.Hit.WithLabelValues(string(label)).Inc()
308+
c.stats.Latency.WithLabelValues(string(label)).Observe(
301309
float64(getNow().UnixMilli() - startedAt.UnixMilli()))
302310
}
303311
}
304312
}
305313

314+
func (c *DCache) recordError(label metricErrLabel) {
315+
if c.stats != nil {
316+
c.stats.Error.WithLabelValues(string(label)).Inc()
317+
}
318+
}
319+
306320
// readValue read through using f and cache to @p key if no error and not @p noStore.
307321
// return the marshaled bytes if no error.
308322
func (c *DCache) readValue(
@@ -319,11 +333,7 @@ func (c *DCache) readValue(
319333
// NOTE: This is mostly useful when user call cache layer with noCache flag, because
320334
// when cache is used, call to this function is protected by a distributed lock.
321335
rv, err, _ := c.group.Do(key, func() (any, error) {
322-
defer c.recordLatency(hitLabelDB, getNow())()
323-
if c.stats != nil {
324-
c.stats.Hit.WithLabelValues(hitLabelDB).Inc()
325-
}
326-
// c.stats.
336+
defer c.makeHitRecorder(hitLabelDB, getNow())()
327337
dbres, ttl, err := f()
328338
return &valueTtl{
329339
Val: dbres,
@@ -343,10 +353,8 @@ func (c *DCache) readValue(
343353
// successfully retrieved.
344354
err := c.setKey(ctx, key, valueBytes, valTtl.Ttl, false)
345355
if err != nil {
346-
log.Err(err).Msgf("Failed to set Redis cache for %s", key)
347-
if c.stats != nil {
348-
c.stats.Error.WithLabelValues(errLabelSetRedis).Inc()
349-
}
356+
log.Ctx(ctx).Err(err).Msgf("Failed to set Redis cache for %s", key)
357+
c.recordError(errLabelSetRedis)
350358
}
351359
}
352360
return valueBytes, nil
@@ -366,12 +374,24 @@ func (c *DCache) setKey(ctx context.Context, key string, valueBytes []byte, ttl
366374
if err != nil {
367375
return err
368376
}
369-
c.updateMemoryCache(key, ve, isExplicitSet)
377+
c.updateMemoryCache(ctx, key, ve, isExplicitSet)
370378
return nil
371379
}
372380

381+
// tryReadFromRedis try to read value from Redis.
382+
func (c *DCache) tryReadFromRedis(ctx context.Context, key string) (*ValueBytesExpiredAt, error) {
383+
veBytes, err := c.conn.Get(ctx, storeKey(key)).Bytes()
384+
if err != nil {
385+
return nil, err
386+
}
387+
ve := &ValueBytesExpiredAt{}
388+
err = msgpack.Unmarshal(veBytes, ve)
389+
return ve, err
390+
}
391+
373392
// isExplicitSet = true, calling from Set. Otherwise, value is backfilled from Redis.
374-
func (c *DCache) updateMemoryCache(key string, ve *ValueBytesExpiredAt, isExplicitSet bool) {
393+
func (c *DCache) updateMemoryCache(
394+
ctx context.Context, key string, ve *ValueBytesExpiredAt, isExplicitSet bool) {
375395
// update memory cache.
376396
// sub-second TTL will be ignored for memory cache.
377397
ttl := time.UnixMilli(ve.ExpiredAt).Unix() - getNow().Unix()
@@ -394,10 +414,8 @@ func (c *DCache) updateMemoryCache(key string, ve *ValueBytesExpiredAt, isExplic
394414
// ignore in memory cache error
395415
err = c.inMemCache.Set([]byte(storeKey(key)), ve.ValueBytes, int(ttl))
396416
if err != nil {
397-
log.Err(err).Msgf("Failed to set memory cache for key %s", storeKey(key))
398-
if c.stats != nil {
399-
c.stats.Error.WithLabelValues(errLabelSetMemCache).Inc()
400-
}
417+
log.Ctx(ctx).Err(err).Msgf("Failed to set memory cache for key %s", storeKey(key))
418+
c.recordError(errLabelSetMemCache)
401419
}
402420
}
403421
}
@@ -478,11 +496,8 @@ func (c *DCache) listenKeyInvalidate() {
478496
l := strings.Split(payload, delimiter)
479497
if len(l) < 2 {
480498
// Invalid payload
481-
log.Warn().Msgf("Received invalidate payload %s", payload)
482-
483-
if c.stats != nil {
484-
c.stats.Error.WithLabelValues(errLabelInvalidate).Inc()
485-
}
499+
log.Error().Msgf("Received invalidate payload %s", payload)
500+
c.recordError(errLabelInvalidate)
486501
return
487502
}
488503
if l[0] == c.id {
@@ -547,6 +562,7 @@ func (c *DCache) Get(ctx context.Context, key string, target any, expire time.Du
547562
//
548563
// @p noStore: The response value will not be saved into the cache.
549564
func (c *DCache) GetWithTtl(ctx context.Context, key string, target any, read ReadWithTtlFunc, noCache bool, noStore bool) (err error) {
565+
startedAt := getNow()
550566
if c.tracer != nil {
551567
ctx = c.tracer.TraceStart(ctx,
552568
"GetWithTtl",
@@ -567,55 +583,57 @@ func (c *DCache) GetWithTtl(ctx context.Context, key string, target any, read Re
567583
err = unmarshal(targetBytes, target)
568584
return
569585
}
570-
// lookup in memory cache.
586+
// lookup in memory cache, return only when unmarshal succeeded.
571587
if c.inMemCache != nil {
572588
var targetBytes []byte
573589
targetBytes, err = c.inMemCache.Get([]byte(storeKey(key)))
574590
if err == nil {
575-
if c.stats != nil {
576-
c.stats.Hit.WithLabelValues(hitLabelMemory).Inc()
577-
}
578-
if c.tracer != nil {
579-
c.tracer.TraceHitFrom(ctx, hitMem)
580-
}
581591
err = unmarshal(targetBytes, target)
582-
return
592+
if err == nil {
593+
c.makeHitRecorder(hitLabelMemory, startedAt)()
594+
if c.tracer != nil {
595+
c.tracer.TraceHitFrom(ctx, hitMem)
596+
}
597+
return
598+
}
599+
} else {
600+
c.recordError(errLabelMemoryUnmarshalFailed)
583601
}
584602
}
585603

586604
var anyTypedBytes any
605+
var targetHasUnmarshalled bool
587606
anyTypedBytes, err, _ = c.group.Do(lockKey(key), func() (any, error) {
588607
// distributed single flight to query db for value.
589-
startedAt := getNow()
590608
for {
591-
ve := &ValueBytesExpiredAt{}
592-
veBytes, e := c.conn.Get(ctx, storeKey(key)).Bytes()
593-
if e == nil {
594-
e = msgpack.Unmarshal(veBytes, ve)
595-
}
609+
ve, e := c.tryReadFromRedis(ctx, key)
596610
if e == nil {
597-
// Value was retrieved from Redis, backfill memory cache and return.
598-
if c.stats != nil {
599-
c.stats.Hit.WithLabelValues(hitLabelRedis).Inc()
600-
}
601-
if c.tracer != nil {
602-
c.tracer.TraceHitFrom(ctx, hitRedis)
603-
}
604-
c.recordLatency(hitLabelRedis, startedAt)
605-
if !noStore {
606-
c.updateMemoryCache(key, ve, false)
611+
// NOTE: must check if bytes stored in Redis can be correctly
612+
// unmarshalled into target, because it may not when data structure changes.
613+
// When that happens, we will still fetch from DB.
614+
e = unmarshal(ve.ValueBytes, target)
615+
if e == nil {
616+
targetHasUnmarshalled = true
617+
// Value was retrieved from Redis, backfill memory cache and return.
618+
defer c.makeHitRecorder(hitLabelRedis, startedAt)()
619+
if c.tracer != nil {
620+
c.tracer.TraceHitFrom(ctx, hitRedis)
621+
}
622+
if !noStore {
623+
c.updateMemoryCache(ctx, key, ve, false)
624+
}
625+
return ve.ValueBytes, nil
626+
} else {
627+
c.recordError(errLabelRedisUnmarshalFailed)
607628
}
608-
return ve.ValueBytes, nil
609629
}
610630
// If failed to retrieve value from Redis, try to get a lock and query DB.
611631
// To avoid spamming Redis with SetNX requests, only one request should try to get
612632
// the lock per-pod.
613633
// If timeout or not cache-able error, another thread will obtain lock after sleep.
614634
updated, err := c.conn.SetNX(ctx, lockKey(key), "", c.readInterval).Result()
615635
if err != nil {
616-
if c.stats != nil {
617-
c.stats.Error.WithLabelValues(errLabelSetRedis).Inc()
618-
}
636+
c.recordError(errLabelSetRedis)
619637
}
620638
if updated {
621639
return c.readValue(ctx, key, read, noStore)
@@ -627,14 +645,18 @@ func (c *DCache) GetWithTtl(ctx context.Context, key string, target any, read Re
627645
// timeout, all of them will timeout.
628646
return nil, ErrTimeout
629647
case <-time.After(lockSleep):
648+
// TODO(yumin): we can further optimize this part by
649+
// check TTL of lockKey(key), and sleep wisely.
630650
continue
631651
}
632652
}
633653
})
634654
if err != nil {
635655
return
636656
}
637-
err = unmarshal(anyTypedBytes.([]byte), target)
657+
if !targetHasUnmarshalled {
658+
err = unmarshal(anyTypedBytes.([]byte), target)
659+
}
638660
return
639661
}
640662

0 commit comments

Comments
 (0)