diff --git a/src/snmalloc/backend_helpers/statsrange.h b/src/snmalloc/backend_helpers/statsrange.h index d1e213777..8fe676fb6 100644 --- a/src/snmalloc/backend_helpers/statsrange.h +++ b/src/snmalloc/backend_helpers/statsrange.h @@ -16,8 +16,7 @@ namespace snmalloc { using ContainsParent::parent; - static inline stl::Atomic current_usage{}; - static inline stl::Atomic peak_usage{}; + static inline Stat usage{}; public: static constexpr bool Aligned = ParentRange::Aligned; @@ -30,34 +29,26 @@ namespace snmalloc CapPtr alloc_range(size_t size) { - auto result = parent.alloc_range(size); - if (result != nullptr) - { - auto prev = current_usage.fetch_add(size); - auto curr = peak_usage.load(); - while (curr < prev + size) - { - if (peak_usage.compare_exchange_weak(curr, prev + size)) - break; - } - } - return result; + auto r = parent.alloc_range(size); + if (r != nullptr) + usage += size; + return r; } void dealloc_range(CapPtr base, size_t size) { - current_usage -= size; + usage -= size; parent.dealloc_range(base, size); } size_t get_current_usage() { - return current_usage.load(); + return usage.get_curr(); } size_t get_peak_usage() { - return peak_usage.load(); + return usage.get_peak(); } }; }; diff --git a/src/snmalloc/ds_aal/ds_aal.h b/src/snmalloc/ds_aal/ds_aal.h index 21eeb8dd6..e0b4ac202 100644 --- a/src/snmalloc/ds_aal/ds_aal.h +++ b/src/snmalloc/ds_aal/ds_aal.h @@ -7,4 +7,5 @@ #include "../aal/aal.h" #include "flaglock.h" #include "prevent_fork.h" +#include "seqset.h" #include "singleton.h" \ No newline at end of file diff --git a/src/snmalloc/ds_core/seqset.h b/src/snmalloc/ds_aal/seqset.h similarity index 99% rename from src/snmalloc/ds_core/seqset.h rename to src/snmalloc/ds_aal/seqset.h index 6046bca70..0ad18fb4d 100644 --- a/src/snmalloc/ds_core/seqset.h +++ b/src/snmalloc/ds_aal/seqset.h @@ -1,6 +1,5 @@ #pragma once -#include "../aal/aal.h" #include "../ds_core/ds_core.h" #include "snmalloc/stl/type_traits.h" #include "snmalloc/stl/utility.h" diff --git a/src/snmalloc/ds_core/ds_core.h b/src/snmalloc/ds_core/ds_core.h index 38e99dce2..2292b8118 100644 --- a/src/snmalloc/ds_core/ds_core.h +++ b/src/snmalloc/ds_core/ds_core.h @@ -15,5 +15,5 @@ #include "mitigations.h" #include "ptrwrap.h" #include "redblacktree.h" -#include "seqset.h" -#include "tid.h" \ No newline at end of file +#include "stats.h" +#include "tid.h" diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h new file mode 100644 index 000000000..29d4c9a42 --- /dev/null +++ b/src/snmalloc/ds_core/stats.h @@ -0,0 +1,101 @@ +#pragma once + +#include "defines.h" +#include "snmalloc/stl/atomic.h" +#include "stddef.h" + +namespace snmalloc +{ + /** + * Very basic statistic that tracks current and peak values. + */ + class Stat + { + private: + stl::Atomic curr{0}; + stl::Atomic peak{0}; + + public: + void increase(size_t amount) + { + size_t old = curr.fetch_add(amount); + size_t c = old + amount; + size_t p = peak.load(stl::memory_order_relaxed); + while (c > p) + { + if (peak.compare_exchange_strong(p, c)) + break; + } + } + + void decrease(size_t amount) + { + size_t prev = curr.fetch_sub(amount); + SNMALLOC_ASSERT_MSG( + prev >= amount, "prev = {}, amount = {}", prev, amount); + UNUSED(prev); + } + + size_t get_curr() + { + return curr.load(stl::memory_order_relaxed); + } + + size_t get_peak() + { + return peak.load(stl::memory_order_relaxed); + } + + void operator+=(size_t amount) + { + increase(amount); + } + + void operator-=(size_t amount) + { + decrease(amount); + } + + void operator++() + { + increase(1); + } + + void operator--() + { + decrease(1); + } + }; + + /** + * Very basic statistic that can only grow. Not thread-safe. + */ + class MonotoneLocalStat + { + stl::Atomic value{0}; + + public: + void operator++(int) + { + auto old = value.load(stl::memory_order_relaxed); + value.store(old + 1, stl::memory_order_relaxed); + } + + void operator+=(const MonotoneLocalStat& other) + { + auto v = other.value.load(stl::memory_order_relaxed); + value.fetch_add(v, stl::memory_order_relaxed); + } + + void operator+=(size_t v) + { + auto old = value.load(stl::memory_order_relaxed); + value.store(old + v, stl::memory_order_relaxed); + } + + size_t operator*() + { + return value.load(stl::memory_order_relaxed); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index fa769e3e4..e9c9ccc7b 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -84,6 +84,15 @@ namespace snmalloc } } + if ( + result == nullptr && + RemoteDeallocCache::remote_inflight.get_curr() != 0) + { + report_fatal_error( + "debug_check_empty: remote inflight deallocations left {}}", + RemoteDeallocCache::remote_inflight.get_curr()); + } + if (result != nullptr) { *result = okay; @@ -128,6 +137,81 @@ namespace snmalloc } } + template + inline static void get_stats(AllocStats& stats) + { + auto alloc = AllocPool::iterate(); + while (alloc != nullptr) + { + stats += alloc->get_stats(); + alloc = AllocPool::iterate(alloc); + } + } + + template + inline static void print_alloc_stats() + { + static stl::Atomic dump{0}; + + auto l_dump = dump++; + if (l_dump == 0) + { + message<1024>( + "snmalloc_allocs,dumpid,sizeclass,size,allocated,deallocated,in_use," + "bytes,slabs allocated,slabs deallocated,slabs in_use,slabs bytes"); + message<1024>( + "snmalloc_totals,dumpid,backend bytes,peak backend " + "bytes,requested,slabs requested bytes,remote inflight bytes,allocator " + "count"); + } + + AllocStats stats; + snmalloc::get_stats(stats); + size_t total_live{0}; + size_t total_live_slabs{0}; + for (size_t i = 0; i < snmalloc::SIZECLASS_REP_SIZE; i++) + { + auto sc = snmalloc::sizeclass_t::from_raw(i); + auto allocated = *stats[sc].objects_allocated; + auto deallocated = *stats[sc].objects_deallocated; + auto slabs_allocated = *stats[sc].slabs_allocated; + auto slabs_deallocated = *stats[sc].slabs_deallocated; + if (allocated == 0 && deallocated == 0) + continue; + auto size = snmalloc::sizeclass_full_to_size(sc); + auto slab_size = snmalloc::sizeclass_full_to_slab_size(sc); + auto in_use = allocated - deallocated; + auto amount = in_use * size; + total_live += amount; + auto in_use_slabs = slabs_allocated - slabs_deallocated; + auto amount_slabs = in_use_slabs * slab_size; + total_live_slabs += amount_slabs; + + snmalloc::message<1024>( + "snmalloc_allocs,{},{},{},{},{},{},{},{},{},{},{}", + l_dump, + i, + size, + allocated, + deallocated, + in_use, + amount, + slabs_allocated, + slabs_deallocated, + in_use_slabs, + amount_slabs); + } + snmalloc::message<1024>( + "snmalloc_totals,{},{},{},{},{},{},{}", + l_dump, + Config::Backend::get_current_usage(), + Config::Backend::get_peak_usage(), + total_live, + total_live_slabs, + RemoteDeallocCache::remote_inflight.get_curr(), + Config::pool().get_count()); + } + /** * Returns the number of remaining bytes in an object. * diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/alloc.h similarity index 94% rename from src/snmalloc/mem/corealloc.h rename to src/snmalloc/mem/alloc.h index 1b7f7f5b5..d2ef84113 100644 --- a/src/snmalloc/mem/corealloc.h +++ b/src/snmalloc/mem/alloc.h @@ -1,6 +1,7 @@ #pragma once #include "../ds/ds.h" +#include "allocstats.h" #include "check_init.h" #include "freelist.h" #include "metadata.h" @@ -32,7 +33,7 @@ namespace snmalloc } template - inline static SNMALLOC_FAST_PATH capptr::Alloc + inline static SNMALLOC_FAST_PATH void* finish_alloc(freelist::HeadPtr p, smallsizeclass_t sizeclass) { auto r = finish_alloc_no_zero(p, sizeclass); @@ -43,7 +44,7 @@ namespace snmalloc // TODO: Should this be zeroing the free Object state, in the non-zeroing // case? - return r; + return capptr_reveal(r); } struct FastFreeLists @@ -156,6 +157,11 @@ namespace snmalloc */ Ticker ticker; + /** + * Tracks this allocators memory usage + */ + AllocStats stats; + /** * The message queue needs to be accessible from other threads * @@ -437,6 +443,9 @@ namespace snmalloc post(); } + // Push size to global statistics + RemoteDeallocCache::remote_inflight -= bytes_freed; + return action(args...); } @@ -488,16 +497,19 @@ namespace snmalloc freelist::Object::key_root, entry.get_slab_metadata()->as_key_tweak(), domesticate); - if (!need_post && !remote_dealloc_cache.reserve_space(entry, nelem)) - { - need_post = true; - } + + // Need to account for forwarded bytes. + size_t size = nelem * sizeclass_full_to_size(entry.get_sizeclass()); + bytes_returned += size; + + need_post |= remote_dealloc_cache.reserve_space(entry, nelem); + remote_dealloc_cache.template forward( entry.get_remote()->trunc_id(), msg); } template - SNMALLOC_FAST_PATH static auto dealloc_local_objects_fast( + SNMALLOC_FAST_PATH auto dealloc_local_objects_fast( capptr::Alloc msg, const PagemapEntry& entry, BackendSlabMetadata* meta, @@ -523,6 +535,9 @@ namespace snmalloc bytes_freed += objsize * length; + stats[entry.get_sizeclass()].objects_deallocated += + static_cast(length); + // Update the head and the next pointer in the free list. meta->free_queue.append_segment( curr, @@ -546,7 +561,7 @@ namespace snmalloc * - alloc(size_t) * - small_alloc(size_t) * - gets allocation from a fast free list and is done. - * - if no fast free list, + * - otherwise no fast free list and calls small_alloc_slow * - check for message queue * - small_refill(size_t) * - If another free list is available, use it. @@ -583,17 +598,17 @@ namespace snmalloc { // Small allocations are more likely. Improve // branch prediction by placing this case first. - return capptr_reveal(small_alloc(size)); + return small_alloc(size); } - return capptr_reveal(alloc_not_small(size, this)); + return alloc_not_small(size, this); } /** * Fast allocation for small objects. */ template - SNMALLOC_FAST_PATH capptr::Alloc small_alloc(size_t size) + SNMALLOC_FAST_PATH void* small_alloc(size_t size) { auto domesticate = [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { @@ -606,12 +621,20 @@ namespace snmalloc if (SNMALLOC_LIKELY(!fl->empty())) { auto p = fl->take(key, domesticate); + stats[sizeclass].objects_allocated++; return finish_alloc(p, sizeclass); } + return small_alloc_slow(sizeclass, fl); + } + + template + SNMALLOC_SLOW_PATH void* + small_alloc_slow(smallsizeclass_t sizeclass, freelist::Iter<>* fl) + { return handle_message_queue( [](Allocator* alloc, smallsizeclass_t sizeclass, freelist::Iter<>* fl) - -> capptr::Alloc { + -> void* { return alloc->small_refill(sizeclass, *fl); }, this, @@ -629,7 +652,7 @@ namespace snmalloc * register. */ template - static SNMALLOC_SLOW_PATH capptr::Alloc + static SNMALLOC_SLOW_PATH void* alloc_not_small(size_t size, Allocator* self) { if (size == 0) @@ -641,15 +664,15 @@ namespace snmalloc } return self->handle_message_queue( - [](Allocator* self, size_t size) -> capptr::Alloc { + [](Allocator* self, size_t size) -> void* { return CheckInit::check_init( - [self, size]() { + [self, size]() -> void* { if (size > bits::one_at_bit(bits::BITS - 1)) { // Cannot allocate something that is more that half the size of // the address space errno = ENOMEM; - return capptr::Alloc{nullptr}; + return nullptr; } // Check if secondary allocator wants to offer the memory @@ -661,7 +684,7 @@ namespace snmalloc { if constexpr (zero_mem == YesZero) Config::Pal::zero(result, size); - return capptr::Alloc::unsafe_from(result); + return result; } // Grab slab of correct size @@ -694,10 +717,17 @@ namespace snmalloc chunk.unsafe_ptr(), bits::next_pow2(size)); } - return capptr_chunk_is_alloc( - capptr_to_user_address_control(chunk)); + if (chunk.unsafe_ptr() != nullptr) + { + auto sc = size_to_sizeclass_full(size); + self->stats[sc].objects_allocated++; + self->stats[sc].slabs_allocated++; + } + + return capptr_reveal( + capptr_chunk_is_alloc(capptr_to_user_address_control(chunk))); }, - [](Allocator* a, size_t size) { + [](Allocator* a, size_t size) -> void* { return alloc_not_small(size, a); }, size); @@ -707,7 +737,7 @@ namespace snmalloc } template - SNMALLOC_FAST_PATH capptr::Alloc + SNMALLOC_FAST_PATH void* small_refill(smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list) { void* result = SecondaryAllocator::allocate( @@ -727,10 +757,8 @@ namespace snmalloc // deallocated, before snmalloc is initialised, then it will fail // to access the pagemap. return CheckInit::check_init( - [result]() { return capptr::Alloc::unsafe_from(result); }, - [](Allocator*, void* result) { - return capptr::Alloc::unsafe_from(result); - }, + [result]() { return result; }, + [](Allocator*, void* result) { return result; }, result); } @@ -773,6 +801,7 @@ namespace snmalloc laden.insert(meta); } + stats[sizeclass].objects_allocated++; auto r = finish_alloc(p, sizeclass); return ticker.check_tick(r); } @@ -780,11 +809,11 @@ namespace snmalloc } template - SNMALLOC_SLOW_PATH capptr::Alloc small_refill_slow( + SNMALLOC_SLOW_PATH void* small_refill_slow( smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list) { return CheckInit::check_init( - [this, sizeclass, &fast_free_list]() -> capptr::Alloc { + [this, sizeclass, &fast_free_list]() -> void* { size_t rsize = sizeclass_to_size(sizeclass); // No existing free list get a new slab. @@ -831,6 +860,9 @@ namespace snmalloc laden.insert(meta); } + stats[sizeclass].slabs_allocated++; + stats[sizeclass].objects_allocated++; + auto r = finish_alloc(p, sizeclass); return ticker.check_tick(r); }, @@ -1006,6 +1038,7 @@ namespace snmalloc */ if (SNMALLOC_LIKELY(public_state() == entry.get_remote())) { + stats[entry.get_sizeclass()].objects_deallocated++; dealloc_cheri_checks(p_tame.unsafe_ptr()); dealloc_local_object(p_tame, entry); return; @@ -1074,6 +1107,8 @@ namespace snmalloc // Remove from set of fully used slabs. meta->node.remove(); + stats[entry.get_sizeclass()].slabs_deallocated++; + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, p, size, entry.get_sizeclass()); @@ -1170,6 +1205,8 @@ namespace snmalloc // don't touch the cache lines at this point in snmalloc_check_client. auto start = clear_slab(meta, sizeclass); + stats[sizeclass].slabs_deallocated++; + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, @@ -1336,10 +1373,10 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; - size_t bytes_flushed = 0; // Not currently used. - if (destroy_queue) { + size_t bytes_flushed = 0; + auto cb = [this, domesticate, &bytes_flushed](capptr::Alloc m) { bool need_post = true; // Always going to post, so ignore. @@ -1350,6 +1387,8 @@ namespace snmalloc }; message_queue().destroy_and_iterate(domesticate, cb); + + RemoteDeallocCache::remote_inflight -= bytes_flushed; } else { @@ -1397,8 +1436,9 @@ namespace snmalloc } }); - // Set the remote_dealloc_cache to immediately slow path. - remote_dealloc_cache.capacity = 0; + // TODO: I don't think this is needed. + // // Set the remote_dealloc_cache to immediately slow path. + // remote_dealloc_cache.cache_bytes = REMOTE_CACHE; return posted; } @@ -1467,6 +1507,11 @@ namespace snmalloc #endif return sent_something; } + + const AllocStats& get_stats() + { + return stats; + } }; template diff --git a/src/snmalloc/mem/allocstats.h b/src/snmalloc/mem/allocstats.h new file mode 100644 index 000000000..bfa789c36 --- /dev/null +++ b/src/snmalloc/mem/allocstats.h @@ -0,0 +1,44 @@ +#include "../ds_core/ds_core.h" +#include "sizeclasstable.h" + +#include + +namespace snmalloc +{ + struct AllocStat + { + MonotoneLocalStat objects_allocated{}; + MonotoneLocalStat objects_deallocated{}; + MonotoneLocalStat slabs_allocated{}; + MonotoneLocalStat slabs_deallocated{}; + }; + + class AllocStats + { + std::array sizeclass{}; + + public: + AllocStat& operator[](sizeclass_t index) + { + auto i = index.raw(); + return sizeclass[i]; + } + + AllocStat& operator[](smallsizeclass_t index) + { + return sizeclass[sizeclass_t::from_small_class(index).raw()]; + } + + void operator+=(const AllocStats& other) + { + for (size_t i = 0; i < SIZECLASS_REP_SIZE; i++) + { + sizeclass[i].objects_allocated += other.sizeclass[i].objects_allocated; + sizeclass[i].objects_deallocated += + other.sizeclass[i].objects_deallocated; + sizeclass[i].slabs_allocated += other.sizeclass[i].slabs_allocated; + sizeclass[i].slabs_deallocated += other.sizeclass[i].slabs_deallocated; + } + } + }; +} // namespace snmalloc \ No newline at end of file diff --git a/src/snmalloc/mem/mem.h b/src/snmalloc/mem/mem.h index fc5e59965..e9c80765c 100644 --- a/src/snmalloc/mem/mem.h +++ b/src/snmalloc/mem/mem.h @@ -1,7 +1,7 @@ +#include "alloc.h" #include "backend_concept.h" #include "backend_wrappers.h" #include "check_init.h" -#include "corealloc.h" #include "entropy.h" #include "freelist.h" #include "metadata.h" diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h index 9b6294d67..6bce43f06 100644 --- a/src/snmalloc/mem/pool.h +++ b/src/snmalloc/mem/pool.h @@ -32,9 +32,15 @@ namespace snmalloc FlagWord lock{}; capptr::Alloc list{nullptr}; + stl::Atomic count{0}; public: constexpr PoolState() = default; + + size_t get_count() + { + return count.load(stl::memory_order_relaxed); + } }; /** @@ -81,7 +87,7 @@ namespace snmalloc * The third template argument is a method to retrieve the actual PoolState. * * For the pool of allocators, refer to the AllocPool alias defined in - * corealloc.h. + * alloc.h. * * For a pool of another type, it is recommended to leave the * third template argument with its default value. The SingletonPoolState @@ -124,6 +130,8 @@ namespace snmalloc p->list_next = pool.list; pool.list = p; + pool.count++; + p->set_in_use(); }); return p.unsafe_ptr(); diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h index 3d5ed70b8..ec60839f4 100644 --- a/src/snmalloc/mem/remotecache.h +++ b/src/snmalloc/mem/remotecache.h @@ -194,18 +194,19 @@ namespace snmalloc RemoteDeallocCacheBatchingImpl batching; + static inline Stat remote_inflight; + /** - * The total amount of memory we are waiting for before we will dispatch - * to other allocators. Zero can mean we have not initialised the allocator - * yet. This is initialised to the 0 so that we always hit a slow path to - * start with, when we hit the slow path and need to dispatch everything, we - * can check if we are a real allocator and lazily provide a real allocator. + * The total amount of bytes of memory in the cache. + * + * REMOTE_CACHE is used as the initial value, so that we always hit a slow + * path to start with, when we hit the slow path and need to dispatch + * everything, we can check if we are a real allocator and lazily provide a + * real allocator. */ - int64_t capacity{0}; + size_t cache_bytes{REMOTE_CACHE}; -#ifndef NDEBUG bool initialised = false; -#endif /// Used to find the index into the array of queues for remote /// deallocation @@ -233,13 +234,23 @@ namespace snmalloc { static_assert(sizeof(n) * 8 > MAX_CAPACITY_BITS); - auto size = - n * static_cast(sizeclass_full_to_size(entry.get_sizeclass())); + size_t size = n * sizeclass_full_to_size(entry.get_sizeclass()); + + size_t new_cache_bytes = cache_bytes + size; + if (SNMALLOC_UNLIKELY(new_cache_bytes > REMOTE_CACHE)) + { + // Check if this is the default allocator, and if not, we + // can update the state. + if (initialised) + { + cache_bytes = new_cache_bytes; + } - bool result = capacity > size; - if (result) - capacity -= size; - return result; + return false; + } + + cache_bytes = new_cache_bytes; + return true; } template @@ -288,6 +299,9 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; + // We are about to post cache_bytes bytes to other allocators. + remote_inflight += cache_bytes; + batching.close_all([this]( RemoteAllocator::alloc_id_t target_id, capptr::Alloc msg) { @@ -356,8 +370,8 @@ namespace snmalloc } } - // Reset capacity as we have emptied everything - capacity = REMOTE_CACHE; + // Reset capacity as we have empty everything + cache_bytes = 0; return sent_something; } @@ -373,18 +387,16 @@ namespace snmalloc */ void init() { -#ifndef NDEBUG initialised = true; -#endif + for (auto& l : list) { // We do not need to initialise with a particular slab, so pass // a null address. l.init(0, RemoteAllocator::key_global, NO_KEY_TWEAK); } - capacity = REMOTE_CACHE; - batching.init(); + cache_bytes = 0; } }; } // namespace snmalloc diff --git a/src/snmalloc/stl/gnu/atomic.h b/src/snmalloc/stl/gnu/atomic.h index 7a193972e..bb7145f65 100644 --- a/src/snmalloc/stl/gnu/atomic.h +++ b/src/snmalloc/stl/gnu/atomic.h @@ -63,6 +63,11 @@ namespace snmalloc return __builtin_addressof(ref); } + SNMALLOC_FAST_PATH static const T* addressof(const T& ref) + { + return __builtin_addressof(ref); + } + // From libc++: // require types that are 1, 2, 4, 8, or 16 bytes in length to be aligned // to at least their size to be potentially @@ -89,7 +94,8 @@ namespace snmalloc return load(); } - SNMALLOC_FAST_PATH T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) + SNMALLOC_FAST_PATH T + load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) const { T res; __atomic_load(addressof(val), addressof(res), order(mem_ord)); diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc new file mode 100644 index 000000000..ebfe87774 --- /dev/null +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -0,0 +1,34 @@ +#include "snmalloc/snmalloc.h" + +#include + +void test_step() +{ + auto b = snmalloc::get_scoped_allocator(); + auto a = snmalloc::get_scoped_allocator(); + + for (size_t j = 0; j < 32; j++) + for (size_t i = 0; i < 20; i++) + { + auto p = a->alloc(snmalloc::bits::one_at_bit(i)); + if (p != nullptr) + b->dealloc(p); + p = b->alloc(snmalloc::bits::one_at_bit(i)); + if (p != nullptr) + a->dealloc(p); + } +} + +int main() +{ + for (size_t i = 0; i < 1000; i++) + { + if (i % 100 == 0) + { + std::cout << "Step " << i << std::endl; + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(); + } + test_step(); + } +} \ No newline at end of file diff --git a/src/test/func/cleanup/cleanup.cc b/src/test/func/cleanup/cleanup.cc new file mode 100644 index 000000000..5e3666dc8 --- /dev/null +++ b/src/test/func/cleanup/cleanup.cc @@ -0,0 +1,61 @@ +#include +#include +#include +#include + +void ecall() +{ + auto a = snmalloc::get_scoped_allocator(); + std::vector allocs; + for (size_t j = 0; j < 1000; j++) + { + allocs.push_back(a->alloc(j % 1024)); + } + auto p = a->alloc(1 * 1024 * 1024); + memset(p, 0, 1 * 1024 * 1024); + + for (size_t j = 0; j < allocs.size(); j++) + a->dealloc(allocs[j]); + + a->dealloc(p); +} + +void thread_body() +{ + for (int i = 0; i < 1000; i++) + { + ecall(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } +} + +void monitor_body() +{ + for (int i = 0; i < 60; i++) + { + std::cout << "Current: " + << snmalloc::Alloc::Config::Backend::get_current_usage() + << std::endl; + std::cout << "Peak : " + << snmalloc::Alloc::Config::Backend::get_peak_usage() + << std::endl; + std::cout << "Allocs : " << snmalloc::Alloc::Config::pool().get_count() + << std::endl; + std::cout << "--------------------------------------------" << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(1)); + } +} + +int main() +{ + std::vector threads; + for (int i = 0; i < 8; i++) + { + threads.push_back(std::thread(thread_body)); + } + threads.push_back(std::thread(monitor_body)); + + for (auto& t : threads) + t.join(); + return 0; +} \ No newline at end of file diff --git a/src/test/func/memory/memory.cc b/src/test/func/memory/memory.cc index 891737843..9f876d459 100644 --- a/src/test/func/memory/memory.cc +++ b/src/test/func/memory/memory.cc @@ -558,7 +558,7 @@ int main(int argc, char** argv) #endif #define TEST(testname) \ std::cout << "Running " #testname << std::endl; \ - for (size_t i = 0; i < 100; i++) \ + for (size_t i = 0; i < 50; i++) \ testname(); TEST(test_alloc_dealloc_64k); diff --git a/src/test/func/statistics/stats.cc b/src/test/func/statistics/stats.cc index d66f060a1..3bedcd55b 100644 --- a/src/test/func/statistics/stats.cc +++ b/src/test/func/statistics/stats.cc @@ -11,6 +11,7 @@ void debug_check_empty_1() auto r = snmalloc::alloc(size); snmalloc::debug_check_empty(&result); + snmalloc::print_alloc_stats(); if (result != false) { std::cout << "debug_check_empty failed to detect leaked memory:" << size @@ -18,8 +19,12 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + snmalloc::dealloc(r); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != true) { @@ -27,8 +32,12 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + r = snmalloc::alloc(size); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != false) { @@ -37,14 +46,20 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + snmalloc::dealloc(r); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != true) { std::cout << "debug_check_empty failed to say empty:" << size << std::endl; abort(); } + + snmalloc::print_alloc_stats(); } template diff --git a/src/test/perf/batchblitz/batchblitz.cc b/src/test/perf/batchblitz/batchblitz.cc new file mode 100644 index 000000000..3dce75353 --- /dev/null +++ b/src/test/perf/batchblitz/batchblitz.cc @@ -0,0 +1,92 @@ +#include +#include +#include +#include + +size_t threads{0}; +size_t memory{0}; +size_t iterations{0}; + +// Global barrier for synchronising threads. +std::atomic barrier{0}; +std::atomic incarnation{0}; + +std::atomic stop{false}; + +std::vector> allocations; + +NOINLINE bool wait() +{ + auto old_incarnation = incarnation.load(); + // Register we have arrived at the barrier. + if (--barrier == 0) + { + printf("."); + fflush(stdout); + barrier = threads; + incarnation++; + return stop; + } + + while (incarnation.load() == old_incarnation) + { + if (stop) + return true; + snmalloc::Aal::pause(); + } + + return stop; +} + +void thread_func(size_t tid) +{ + size_t size = 4097; + size_t mem = memory / size; + for (size_t j = 0; j < iterations; j++) + { + if (wait()) + return; + std::vector& allocs = allocations[tid]; + for (size_t i = 0; i < mem; i++) + { + allocs.push_back(snmalloc::alloc(4097)); + } + if (wait()) + return; + std::vector& deallocs = allocations[(tid + 1) % threads]; + for (auto p : deallocs) + { + snmalloc::dealloc(p); + } + deallocs.clear(); + } +} + +int main() +{ + threads = std::thread::hardware_concurrency(); + barrier = threads; + + if (snmalloc::DefaultPal::address_bits == 32) + memory = snmalloc::bits::one_at_bit(30) / threads; + else + memory = snmalloc::bits::one_at_bit(32) / threads; + iterations = 1000; + + for (size_t i = 0; i < threads; i++) + allocations.emplace_back(); + + std::vector thread_pool; + for (size_t i = 0; i < threads; i++) + thread_pool.emplace_back(thread_func, i); + + for (size_t i = 0; i < 30; i++) + { + std::this_thread::sleep_for(std::chrono::seconds(1)); + snmalloc::print_alloc_stats(); + } + stop = true; + + for (auto& t : thread_pool) + t.join(); +} diff --git a/src/test/perf/churn/churn.cc b/src/test/perf/churn/churn.cc new file mode 100644 index 000000000..910204a1d --- /dev/null +++ b/src/test/perf/churn/churn.cc @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include + +int main() +{ + std::vector threads; + std::atomic running; + snmalloc::Stat requests; + std::atomic done{false}; + + for (size_t i = 0; i < 16; i++) + { + threads.push_back(std::thread([&running, &requests, &done]() { + std::queue q; + while (!done) + { + snmalloc::ScopedAllocator alloc; + running++; + + if (rand() % 1000 == 0) + { + // Deallocate everything in the queue + while (q.size() > 0) + { + auto p = q.front(); + requests -= *p; + alloc->dealloc(p); + q.pop(); + } + } + + for (size_t j = 0; j < 1000; j++) + { + if (q.size() >= 20000 || (q.size() > 0 && (rand() % 10 == 0))) + { + auto p = q.front(); + requests -= *p; + alloc->dealloc(p); + q.pop(); + } + else + { + size_t size = + (rand() % 1024 == 0) ? 16 * 1024 * (1 << (rand() % 3)) : 48; + requests += size; + auto p = (size_t*)alloc->alloc(size); + *p = size; + q.push(p); + } + } + + running--; + std::this_thread::sleep_for(std::chrono::microseconds(rand() % 2000)); + } + })); + } + + std::thread([&requests]() { + size_t count = 0; + while (count < 60) + { + count++; + std::this_thread::sleep_for(std::chrono::seconds(1)); + // std::cout << "Inflight: " << + // snmalloc::RemoteDeallocCache::remote_inflight << + // std::endl; std::cout + // << "Current reservation: " << snmalloc::Globals::get_current_usage() << + // std::endl; std::cout << "Peak reservation: " << + // snmalloc::Globals::get_peak_usage() << std::endl; std::cout << + // "Allocator count: " << snmalloc::Globals::pool().get_count() << + // std::endl; std::cout << "Running threads: " << running << + // std::endl; std::cout << "Index: " << count << std::endl; + // std::cout << "------------------------------------------" << std::endl; + std::cout + << count << "," << snmalloc::Alloc::Config::Backend::get_peak_usage() + << "," << snmalloc::Alloc::Config::Backend::get_current_usage() << "," + << requests.get_curr() << "," << requests.get_peak() << "," + << snmalloc::RemoteDeallocCache::remote_inflight + .get_peak() + << "," + << snmalloc::RemoteDeallocCache::remote_inflight + .get_curr() + << std::endl; + snmalloc::print_alloc_stats(); + } + }).join(); + + done = true; + + for (auto& t : threads) + t.join(); + + return 0; +} \ No newline at end of file diff --git a/src/test/perf/combininglock/combininglock.cc b/src/test/perf/combininglock/combininglock.cc new file mode 100644 index 000000000..6a9437c70 --- /dev/null +++ b/src/test/perf/combininglock/combininglock.cc @@ -0,0 +1,37 @@ +#include +#include +#include + +snmalloc::CombiningLock cl; + +std::atomic run{true}; + +void loop() +{ + size_t j = 0; + size_t i = 0; + while (run) + { + i++; + snmalloc::with(cl, [&]() { j++; }); + if (i != j) + snmalloc::error("i != j"); + } +} + +int main() +{ + std::vector threads; + for (size_t i = 0; i < 8; i++) + { + threads.emplace_back(std::thread(loop)); + } + + std::this_thread::sleep_for(std::chrono::seconds(100)); + run = false; + + for (auto& t : threads) + { + t.join(); + } +} \ No newline at end of file diff --git a/src/test/perf/realloc/realloc.cc b/src/test/perf/realloc/realloc.cc new file mode 100644 index 000000000..5efcfbaeb --- /dev/null +++ b/src/test/perf/realloc/realloc.cc @@ -0,0 +1,46 @@ +#include "test/opt.h" +#include "test/setup.h" +#include "test/usage.h" +#include "test/xoroshiro.h" + +#include +#include +#include +#include +#include + +using namespace snmalloc; + +NOINLINE +void* myrealloc(void* p, size_t size) +{ + return snmalloc::libc::realloc(p, size); +} + +void grow() +{ + void* base = nullptr; + for (size_t i = 1; i < 1000; i++) + { + base = myrealloc(base, i * 8); + } + snmalloc::libc::free(base); +} + +int main() +{ + auto start = Aal::tick(); + + for (size_t i = 0; i < 10000; i++) + { + grow(); + if (i % 10 == 0) + { + std::cout << "." << std::flush; + } + } + + auto end = Aal::tick(); + + std::cout << "Taken: " << end - start << std::endl; +} \ No newline at end of file