forked from benhoyt/countwords
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathoptimized.cpp
64 lines (60 loc) · 2.25 KB
/
optimized.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include <algorithm>
#include <cinttypes>
#include <cassert>
#include <iostream>
#include <iterator>
#include <string_view>
template <std::size_t hash_size, std::size_t words_buf_size>
class table {
enum : std::uint64_t { hash_init = 0xcbf29ce484222325ULL };
enum : std::uint64_t { hash_next = 0x100000001b3ULL };
struct range { int place; std::uint16_t sz; }; // { offset into words, size }
struct cell { int count = 0; range word; }; // a hash entry
std::uint64_t hash = hash_init;
char* tail = words; char* back = words;
std::size_t entries = 0;
cell buckets[hash_size];
char words[words_buf_size];
auto word_at(range w) { return std::string_view{words + w.place, w.sz}; }
public:
void push(char c) {
assert(std::size_t(back - words) < sizeof(words));
*back++ = c, hash = hash * hash_next ^ c;
}
void count_one() {
if (back != tail) {
std::string_view key{tail, std::size_t(back-tail)};
auto match = [key,this](range word) {
return word.sz == key.size() && word_at(word) == key; };
auto* bucket = &buckets[hash % hash_size]; // op& if hash_size is 2^n
for (; bucket->count && !match(bucket->word);
bucket = (bucket == buckets) ? buckets+hash_size-1 : bucket-1) {}
if (! bucket->count) {
assert(++entries < hash_size/2);
range word = {int(tail - words), std::uint16_t(back - tail)};
*bucket = {1, word }, tail = back; // keep new word
} else ++bucket->count, back = tail; // discard word
hash = hash_init;
}
}
void dump(std::ostream& out) {
auto end = std::partition(buckets, buckets + hash_size, [](auto& b){
return b.count != 0; });
std::sort(buckets, end, [](cell& a, cell& b) {
return a.count > b.count; });
std::for_each(buckets, end, [&](cell& b) {
out << word_at(b.word) << ' ' << b.count << '\n'; });
}
};
table<(1<<16), (1<<19)> counts;
int main() {
std::ios::sync_with_stdio(false);
auto to_lower = [](unsigned char c) { return c | (-(c-'A' < 26) & '\x20'); };
for (std::istreambuf_iterator<char> it(std::cin), end; it != end; ++it) {
if (*it <= ' ') {
counts.count_one();
} else counts.push(to_lower(*it));
}
counts.count_one(); // in case file ends without whitespace
counts.dump(std::cout);
}