diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 938b4f9bb1f..42ea056a100 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -629,6 +629,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( SHARDED_ROCKSDB_HISTOGRAMS_SAMPLE_RATE, 0.001 ); if( isSimulated ) SHARDED_ROCKSDB_HISTOGRAMS_SAMPLE_RATE = deterministicRandom()->random01(); init( SHARDED_ROCKSDB_USE_DIRECT_IO, false ); if (isSimulated) SHARDED_ROCKSDB_USE_DIRECT_IO = deterministicRandom()->coinflip(); init( SHARDED_ROCKSDB_FLUSH_PERIOD, 0.0 ); if (isSimulated) SHARDED_ROCKSDB_FLUSH_PERIOD = deterministicRandom()->randomInt(100, 1000); + init( SHARDED_ROCKSDB_DETAILED_STATS, false ); if (isSimulated) SHARDED_ROCKSDB_DETAILED_STATS = deterministicRandom()->coinflip(); // Leader election diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index cae8487c1e9..3ca3d814cb7 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -597,6 +597,7 @@ class ServerKnobs : public KnobsImpl { double SHARDED_ROCKSDB_HISTOGRAMS_SAMPLE_RATE; bool SHARDED_ROCKSDB_USE_DIRECT_IO; double SHARDED_ROCKSDB_FLUSH_PERIOD; + bool SHARDED_ROCKSDB_DETAILED_STATS; // Leader election int MAX_NOTIFICATIONS; diff --git a/fdbserver/DDTeamCollection.actor.cpp b/fdbserver/DDTeamCollection.actor.cpp index b4af2ee5d0a..47aaa550f4a 100644 --- a/fdbserver/DDTeamCollection.actor.cpp +++ b/fdbserver/DDTeamCollection.actor.cpp @@ -3639,6 +3639,8 @@ class DDTeamCollectionImpl { } } + self->shardsAffectedByTeamFailure->traceTeamShardMapping(); + // TODO: re-enable the following logging or remove them. // TraceEvent("LocalityRecordKeyName", self->getDistributorId()) // .detail("Size", internedLocalityRecordKeyNameStrings.size()) @@ -4942,6 +4944,7 @@ void DDTeamCollection::traceAllInfo(bool shouldPrint) const { traceMachineTeamInfo(); traceLocalityArrayIndexName(); traceMachineLocalityMap(); + shardsAffectedByTeamFailure->traceTeamShardMapping(); } void DDTeamCollection::rebuildMachineLocalityMap() { diff --git a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp index 1451da24ca2..fa704c7c0e3 100644 --- a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp @@ -1298,12 +1298,18 @@ class ShardManager { int numLevels = 0; for (auto it = cfMetadata.levels.begin(); it != cfMetadata.levels.end(); ++it) { std::string propValue = ""; - ASSERT(shard->db->GetProperty(shard->cf, - rocksdb::DB::Properties::kCompressionRatioAtLevelPrefix + - std::to_string(it->level), - &propValue)); - e.detail("Level" + std::to_string(it->level), - std::to_string(it->size) + " " + propValue + " " + std::to_string(it->files.size())); + if (SERVER_KNOBS->SHARDED_ROCKSDB_DETAILED_STATS) { + ASSERT(shard->db->GetProperty(shard->cf, + rocksdb::DB::Properties::kCompressionRatioAtLevelPrefix + + std::to_string(it->level), + &propValue)); + e.detail("Level" + std::to_string(it->level), + std::to_string(it->size) + " " + propValue + " " + + std::to_string(it->files.size())); + } + if (it->level == 0) { + e.detail("Level0Files", it->files.size()); + } if (it->size > 0) { ++numLevels; } diff --git a/fdbserver/ShardsAffectedByTeamFailure.cpp b/fdbserver/ShardsAffectedByTeamFailure.cpp index 015effc566b..22e389bd7a4 100644 --- a/fdbserver/ShardsAffectedByTeamFailure.cpp +++ b/fdbserver/ShardsAffectedByTeamFailure.cpp @@ -196,6 +196,34 @@ void ShardsAffectedByTeamFailure::setCheckMode(CheckMode mode) { checkMode = mode; } +void ShardsAffectedByTeamFailure::traceTeamShardMapping() const { + Team prevTeam; + int count = 0; + int teamCount; + for (auto it = team_shards.begin(); it != team_shards.end(); ++it) { + if (it->first != prevTeam) { + if (count > 0) { + TraceEvent("DDTeamShardCount") + .detail("IsPrimary", prevTeam.primary) + .detail("Team", prevTeam.toString()) + .detail("Shards", count); + } + count = 1; + prevTeam = it->first; + ++teamCount; + } else { + ++count; + } + } + if (count > 0) { + TraceEvent("DDTeamShardCount") + .detail("IsPrimary", prevTeam.primary) + .detail("Team", prevTeam.toString()) + .detail("Shards", count); + } + TraceEvent("DDTeamShardStats").detail("TotalShards", team_shards.size()).detail("TotalTeams", teamCount); +} + void ShardsAffectedByTeamFailure::check() const { if (checkMode == CheckMode::ForceNoCheck) return; diff --git a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h index 406a0cc76c0..cc2fbdda0c6 100644 --- a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h +++ b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h @@ -106,6 +106,8 @@ class ShardsAffectedByTeamFailure : public ReferenceCounted& destinationTeam); void check() const; void setCheckMode(CheckMode); + // Prints the team shard mapping as trace events. + void traceTeamShardMapping() const; PromiseStream restartShardTracker;