forked from mongodb/genny
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PERF-1747: Workload with operations spread across collections (mongod…
- Loading branch information
David
authored
Nov 13, 2018
1 parent
a164ec6
commit 2ea24ee
Showing
9 changed files
with
461 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
SchemaVersion: 2018-07-01 | ||
|
||
Actors: | ||
- Name: Loader | ||
Type: Loader | ||
Threads: 1 | ||
Database: &DB test | ||
CollectionCount: &CollectionCount 10 | ||
DocumentCount: 10000 | ||
BatchSize: 1000 | ||
Document: | ||
x: 0 | ||
y: {$randomint: {min: 0, max: 1000}} | ||
z: {$randomint: {min: 0, max: 2000}} | ||
string: {$randomstring: {length: {$randomint: {min: 5, max: 15}}}} | ||
string2: {$randomstring: {length: {$randomint: {min: 7, max: 20}}}} | ||
Indexes: | ||
- {y: 1} | ||
- {string: 1} | ||
Phases: | ||
- Repeat: 1 | ||
- Repeat: 0 | ||
- Name: MultiCollectionUpdate | ||
Type: MultiCollectionUpdate | ||
Threads: 10 | ||
Database: *DB | ||
CollectionCount: *CollectionCount | ||
Phases: | ||
- UpdateFilter: {} | ||
Update: {} | ||
Repeat: 0 | ||
- UpdateFilter: {y: {$randomint: {min: 0, max: 1000}}} | ||
Update: {$inc: {x: 1}} | ||
Duration: 60000 # TODO: TIG-1154 | ||
MinDelay: 1000 # This should be replaced with Rate with TIG-1155. | ||
- Name: MultiCollectionQuery | ||
Type: MultiCollectionQuery | ||
Threads: 10 | ||
Database: *DB | ||
CollectionCount: *CollectionCount | ||
Phases: | ||
- Filter: {} | ||
Repeat: 0 | ||
- Filter: {y: {$randomint: {min: 0, max: 1000}}} | ||
Limit: 20 | ||
Duration: 60000 # TODO: TIG-1154 | ||
MinDelay: 1000 # This should be replaced with Rate with TIG-1155. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#ifndef HEADER_1E8F3397_B82B_4814_9BB1_6C6D2E046E3A | ||
#define HEADER_1E8F3397_B82B_4814_9BB1_6C6D2E046E3A | ||
|
||
#include <gennylib/Actor.hpp> | ||
#include <gennylib/PhaseLoop.hpp> | ||
#include <gennylib/context.hpp> | ||
|
||
namespace genny::actor { | ||
|
||
/** | ||
* Prepares a database for testing. For use with {@code MultiCollectionUpdate} and {@code | ||
* MultiCollectionQuery} actors. It loads a set of documents into multiple collections with | ||
* indexes. Each collection is identically configured. The document shape, number of documents, | ||
* number of collections, and list of indexes are all adjustable from the yaml configuration. | ||
*/ | ||
class Loader : public Actor { | ||
|
||
public: | ||
explicit Loader(ActorContext& context, const unsigned int thread); | ||
~Loader() = default; | ||
|
||
void run() override; | ||
|
||
static ActorVector producer(ActorContext& context); | ||
|
||
private: | ||
struct PhaseConfig; | ||
std::mt19937_64 _rng; | ||
metrics::Timer _totalBulkLoadTimer; | ||
metrics::Timer _individualBulkLoadTimer; | ||
metrics::Timer _indexBuildTimer; | ||
mongocxx::pool::entry _client; | ||
PhaseLoop<PhaseConfig> _loop; | ||
}; | ||
|
||
} // namespace genny::actor | ||
|
||
#endif // HEADER_1E8F3397_B82B_4814_9BB1_6C6D2E046E3A |
37 changes: 37 additions & 0 deletions
37
src/gennylib/include/gennylib/actors/MultiCollectionQuery.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#ifndef HEADER_F86B8CA3_F0C0_4973_9FC8_3875A76D7610 | ||
#define HEADER_F86B8CA3_F0C0_4973_9FC8_3875A76D7610 | ||
|
||
#include <gennylib/Actor.hpp> | ||
#include <gennylib/PhaseLoop.hpp> | ||
#include <gennylib/context.hpp> | ||
|
||
namespace genny::actor { | ||
|
||
/** | ||
* MultiCollectionQuery is an actor that performs updates across parameterizable number of | ||
* collections. Updates are performed in a loop using {@code PhaseLoop} and each iteration picks a | ||
* random collection to update. The actor records the latency of each update, and the total number | ||
* of documents updated. | ||
*/ | ||
class MultiCollectionQuery : public Actor { | ||
|
||
public: | ||
explicit MultiCollectionQuery(ActorContext& context, const unsigned int thread); | ||
~MultiCollectionQuery() = default; | ||
|
||
void run() override; | ||
|
||
static ActorVector producer(ActorContext& context); | ||
|
||
private: | ||
struct PhaseConfig; | ||
std::mt19937_64 _rng; | ||
metrics::Timer _queryTimer; | ||
metrics::Counter _documentCount; | ||
mongocxx::pool::entry _client; | ||
PhaseLoop<PhaseConfig> _loop; | ||
}; | ||
|
||
} // namespace genny::actor | ||
|
||
#endif // HEADER_F86B8CA3_F0C0_4973_9FC8_3875A76D7610 |
38 changes: 38 additions & 0 deletions
38
src/gennylib/include/gennylib/actors/MultiCollectionUpdate.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#ifndef HEADER_D112CCC3_DF60_434E_A038_5A7AADED0E46 | ||
#define HEADER_D112CCC3_DF60_434E_A038_5A7AADED0E46 | ||
|
||
#include <gennylib/Actor.hpp> | ||
#include <gennylib/PhaseLoop.hpp> | ||
#include <gennylib/context.hpp> | ||
|
||
namespace genny::actor { | ||
|
||
/** | ||
* MultiCollectionUpdate is an actor that performs updates across parameterizable number of | ||
* collections. Updates are performed in a loop using {@code PhaseLoop} and each iteration picks a | ||
* random collection to update. The actor records the latency of each update, and the total number | ||
* of documents updated. | ||
*/ | ||
class MultiCollectionUpdate : public Actor { | ||
|
||
public: | ||
explicit MultiCollectionUpdate(ActorContext& context, const unsigned int thread); | ||
~MultiCollectionUpdate() = default; | ||
|
||
void run() override; | ||
|
||
static ActorVector producer(ActorContext& context); | ||
|
||
private: | ||
struct PhaseConfig; | ||
std::mt19937_64 _rng; | ||
|
||
metrics::Timer _updateTimer; | ||
metrics::Counter _updateCount; | ||
mongocxx::pool::entry _client; | ||
PhaseLoop<PhaseConfig> _loop; | ||
}; | ||
|
||
} // namespace genny::actor | ||
|
||
#endif // HEADER_D112CCC3_DF60_434E_A038_5A7AADED0E46 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#include <gennylib/actors/Loader.hpp> | ||
|
||
#include <algorithm> | ||
#include <memory> | ||
|
||
#include <bsoncxx/json.hpp> | ||
#include <mongocxx/client.hpp> | ||
#include <mongocxx/pool.hpp> | ||
#include <yaml-cpp/yaml.h> | ||
|
||
#include "log.hh" | ||
#include <gennylib/context.hpp> | ||
#include <gennylib/value_generators.hpp> | ||
|
||
|
||
namespace {} // namespace | ||
|
||
struct genny::actor::Loader::PhaseConfig { | ||
PhaseConfig(PhaseContext& context, std::mt19937_64& rng, mongocxx::pool::entry& client) | ||
: database{(*client)[context.get<std::string>("Database")]}, | ||
numCollections{context.get<uint>("CollectionCount")}, | ||
numDocuments{context.get<uint>("DocumentCount")}, | ||
batchSize{context.get<uint>("BatchSize")}, | ||
documentTemplate{value_generators::makeDoc(context.get("Document"), rng)} { | ||
auto indexNodes = context.get<std::vector<YAML::Node>>("Indexes"); | ||
for (auto indexNode : indexNodes) { | ||
indexes.push_back(value_generators::makeDoc(indexNode, rng)); | ||
} | ||
} | ||
|
||
mongocxx::database database; | ||
uint numCollections; | ||
uint numDocuments; | ||
uint batchSize; | ||
std::unique_ptr<value_generators::DocumentGenerator> documentTemplate; | ||
std::vector<std::unique_ptr<value_generators::DocumentGenerator>> indexes; | ||
}; | ||
|
||
void genny::actor::Loader::run() { | ||
for (auto&& [phase, config] : _loop) { | ||
for (auto&& _ : config) { | ||
config->database.drop(); | ||
for (uint i = 0; i < config->numCollections; i++) { | ||
auto collectionName = "Collection" + std::to_string(i); | ||
auto collection = config->database[collectionName]; | ||
// Insert the documents | ||
uint remainingInserts = config->numDocuments; | ||
{ | ||
auto totalOp = _totalBulkLoadTimer.raii(); | ||
while (remainingInserts > 0) { | ||
// insert the next batch | ||
uint numberToInsert = std::max(config->batchSize, remainingInserts); | ||
std::vector<bsoncxx::builder::stream::document> docs(numberToInsert); | ||
std::vector<bsoncxx::document::view> views; | ||
auto newDoc = docs.begin(); | ||
for (uint j = 0; j < numberToInsert; j++, newDoc++) { | ||
views.push_back(config->documentTemplate->view(*newDoc)); | ||
} | ||
{ | ||
auto individualOp = _individualBulkLoadTimer.raii(); | ||
auto result = collection.insert_many(views); | ||
remainingInserts -= result->inserted_count(); | ||
} | ||
} | ||
} | ||
// For each index | ||
for (auto& index : config->indexes) { | ||
// Make the index | ||
bsoncxx::builder::stream::document keys; | ||
auto keyView = index->view(keys); | ||
// BOOST_LOG_TRIVIAL(info) << "Building index " << bsoncxx::to_json(keyView); | ||
{ | ||
auto op = _indexBuildTimer.raii(); | ||
collection.create_index(keyView); | ||
} | ||
} | ||
} | ||
BOOST_LOG_TRIVIAL(info) << "Done with load phase. All documents loaded"; | ||
} | ||
} | ||
} | ||
|
||
genny::actor::Loader::Loader(genny::ActorContext& context, const unsigned int thread) | ||
: _rng{context.workload().createRNG()}, | ||
_totalBulkLoadTimer{context.timer("totalBulkInsertTime", thread)}, | ||
_individualBulkLoadTimer{context.timer("individualBulkInsertTime", thread)}, | ||
_indexBuildTimer{context.timer("indexBuildTime", thread)}, | ||
_client{context.client()}, | ||
_loop{context, _rng, _client} {} | ||
|
||
genny::ActorVector genny::actor::Loader::producer(genny::ActorContext& context) { | ||
auto out = std::vector<std::unique_ptr<genny::Actor>>{}; | ||
if (context.get<std::string>("Type") != "Loader") { | ||
return out; | ||
} | ||
// Loader is single threaded for now | ||
out.push_back(std::make_unique<genny::actor::Loader>(context, 0)); | ||
return out; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#include <gennylib/actors/MultiCollectionQuery.hpp> | ||
|
||
#include <chrono> | ||
#include <memory> | ||
#include <string> | ||
#include <thread> | ||
|
||
#include <bsoncxx/json.hpp> | ||
#include <mongocxx/client.hpp> | ||
#include <mongocxx/pool.hpp> | ||
#include <yaml-cpp/yaml.h> | ||
|
||
#include "log.hh" | ||
#include <gennylib/context.hpp> | ||
#include <gennylib/value_generators.hpp> | ||
|
||
namespace {} // namespace | ||
|
||
struct genny::actor::MultiCollectionQuery::PhaseConfig { | ||
PhaseConfig(PhaseContext& context, | ||
std::mt19937_64& rng, | ||
mongocxx::pool::entry& client, | ||
int thread) | ||
: database{(*client)[context.get<std::string>("Database")]}, | ||
numCollections{context.get<uint>("CollectionCount")}, | ||
filterDocument{value_generators::makeDoc(context.get("Filter"), rng)}, | ||
uniformDistribution{0, numCollections}, | ||
minDelay{context.get<std::chrono::milliseconds, false>("MinDelay") | ||
.value_or(std::chrono::milliseconds(0))} {} | ||
|
||
mongocxx::database database; | ||
uint numCollections; | ||
std::unique_ptr<value_generators::DocumentGenerator> filterDocument; | ||
// uniform distribution random int for selecting collection | ||
std::uniform_int_distribution<uint> uniformDistribution; | ||
std::chrono::milliseconds minDelay; | ||
mongocxx::options::find options; | ||
}; | ||
|
||
void genny::actor::MultiCollectionQuery::run() { | ||
for (auto&& [phase, config] : _loop) { | ||
for (auto&& _ : config) { | ||
// Take a timestamp -- remove after TIG-1155 | ||
auto startTime = std::chrono::steady_clock::now(); | ||
|
||
// Select a collection | ||
// This area is ripe for defining a collection generator, based off a string generator. | ||
// It could look like: collection: {@concat: [Collection, @randomint: {min: 0, max: | ||
// *CollectionCount]} Requires a string concat generator, and a translation of a string | ||
// to a collection | ||
auto collectionNumber = config->uniformDistribution(_rng); | ||
auto collectionName = "Collection" + std::to_string(collectionNumber); | ||
auto collection = config->database[collectionName]; | ||
|
||
// Perform a query | ||
bsoncxx::builder::stream::document filter{}; | ||
auto filterView = config->filterDocument->view(filter); | ||
// BOOST_LOG_TRIVIAL(info) << "Filter is " << bsoncxx::to_json(filterView); | ||
// BOOST_LOG_TRIVIAL(info) << "Collection Name is " << collectionName; | ||
{ | ||
// Only time the actual update, not the setup of arguments | ||
auto op = _queryTimer.raii(); | ||
auto cursor = collection.find(filterView, config->options); | ||
// exhaust the cursor | ||
uint count = 0; | ||
for (auto&& doc : cursor) { | ||
doc.length(); | ||
count++; | ||
} | ||
_documentCount.incr(count); | ||
} | ||
// make sure enough time has passed. Sleep if needed -- remove after TIG-1155 | ||
auto elapsedTime = std::chrono::steady_clock::now() - startTime; | ||
if (elapsedTime < config->minDelay) | ||
std::this_thread::sleep_for(config->minDelay - elapsedTime); | ||
} | ||
} | ||
} | ||
|
||
genny::actor::MultiCollectionQuery::MultiCollectionQuery(genny::ActorContext& context, | ||
const unsigned int thread) | ||
: _rng{context.workload().createRNG()}, | ||
_queryTimer{context.timer("queryTime", thread)}, | ||
_documentCount{context.counter("returnedDocuments", thread)}, | ||
_client{context.client()}, | ||
_loop{context, _rng, _client, thread} {} | ||
|
||
genny::ActorVector genny::actor::MultiCollectionQuery::producer(genny::ActorContext& context) { | ||
auto out = std::vector<std::unique_ptr<genny::Actor>>{}; | ||
if (context.get<std::string>("Type") != "MultiCollectionQuery") { | ||
return out; | ||
} | ||
auto threads = context.get<int>("Threads"); | ||
for (int i = 0; i < threads; ++i) { | ||
out.push_back(std::make_unique<genny::actor::MultiCollectionQuery>(context, i)); | ||
} | ||
return out; | ||
} |
Oops, something went wrong.