-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjson-chunker.cpp
62 lines (55 loc) · 2.24 KB
/
json-chunker.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#include <fstream>
#include <iostream>
#include "jsonbuffet.h"
#include "utils.h"
std::string fileName(int i) {
return "chunk_" + std::to_string(i) + ".json";
}
int main(int argc, char *argv[]) {
std::vector<std::string> path;
size_t chunkSizeThreshold = 104857600;
if(argc == 2) {
path = Utils::split(argv[1], '/');
} else if (argc == 3) {
chunkSizeThreshold = atoi(argv[1]);
path = Utils::split(argv[2], '/');
} else {
std::cerr << "Usage: " << argv[0] << "[chunksize_threshold=104857600] /path/to/item/of/interest < /path/to/json" << std::endl;
std::cerr << "Note: You can use empty path components to represent an element of an array." << std::endl;
std::cerr << R"(eg. item//key/ on { "item": [ {"key": "value1"}, {"key": "value2"} ] } creates chunks containing value1 value2)" << std::endl;
return -1;
}
int currentChunk = 0;
std::ofstream file;
bool isFirstElement = true;
file.open(fileName(currentChunk));
file << "[ ";
rapidjson::IStreamWrapper isw(std::cin);
JsonBuffet buffet([&](rapidjson::SizeType, rapidjson::SizeType, const std::string&, const RapidJsonValue& value) -> bool {
if (file.tellp() > chunkSizeThreshold) {
std::cout << "Done writing chunk " << currentChunk << std::endl;
file << " ]";
file << std::flush;
file.close();
currentChunk++;
file.open(fileName(currentChunk));
file << "[ ";
isFirstElement = true;
}
std::stringstream buffer;
if (!isFirstElement) {
buffer << ", ";
}
buffer << value;
file << buffer.rdbuf();
isFirstElement = false;
return true;
},
path);
buffet.Consume(isw);
std::cout << "Done writing chunk " << currentChunk << std::endl;
file << "]";
file << std::flush;
file.close();
return 0;
}