diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt index c0cf0a1ff4176b..3a04abb06b64cb 100644 --- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt +++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt @@ -175,12 +175,14 @@ if(TARGET coreclr) $ ${CLRJIT_STATIC} ${CLRINTERPRETER_STATIC} + vm_gc_wks + $ cee_wks_core cee_wks ${FOUNDATION}) endif() -target_link_libraries(coreclr_static PUBLIC ${CORECLR_LIBRARIES} ${CORECLR_STATIC_CLRJIT_STATIC} ${CORECLR_STATIC_CLRINTERPRETER_STATIC} cee_wks_core ${CEE_WKS_STATIC} ${FOUNDATION}) +target_link_libraries(coreclr_static PUBLIC ${CORECLR_LIBRARIES} ${CORECLR_STATIC_CLRJIT_STATIC} ${CORECLR_STATIC_CLRINTERPRETER_STATIC} vm_gc_wks $ cee_wks_core ${CEE_WKS_STATIC} ${FOUNDATION}) target_compile_definitions(coreclr_static PUBLIC CORECLR_EMBEDDED) if (CLR_CMAKE_HOST_ANDROID) diff --git a/src/coreclr/gc/CMakeLists.txt b/src/coreclr/gc/CMakeLists.txt index 30ac2bb7e91ae8..1e88b637af6e0d 100644 --- a/src/coreclr/gc/CMakeLists.txt +++ b/src/coreclr/gc/CMakeLists.txt @@ -11,16 +11,12 @@ set(GC_SOURCES gcconfig.cpp gccommon.cpp gcscan.cpp - gcsvr.cpp - gcwks.cpp handletable.cpp handletablecore.cpp handletablescan.cpp objecthandle.cpp softwarewritewatch.cpp gchandletable.cpp - gceesvr.cpp - gceewks.cpp gcload.cpp gcbridge.cpp handletablecache.cpp) @@ -56,6 +52,7 @@ if (CLR_CMAKE_TARGET_WIN32) env/gcenv.windows.inl env/volatile.h gc.h + gcinternal.h gcconfig.h gcbridge.h gcdesc.h @@ -104,6 +101,29 @@ list(APPEND GC_SOURCES ${GC_HEADERS}) convert_to_absolute_path(GC_SOURCES ${GC_SOURCES}) +set(GC_WKS_SVR_SOURCES + gcee.cpp + gc.cpp + init.cpp + no_gc.cpp + finalization.cpp + dynamic_tuning.cpp + region_free_list.cpp + region_allocator.cpp + memory.cpp + sweep.cpp + collect.cpp + diagnostics.cpp + dynamic_heap_count.cpp + card_table.cpp + relocate_compact.cpp + mark_phase.cpp + background.cpp + interface.cpp + allocation.cpp + plan_phase.cpp + regions_segments.cpp) + if(FEATURE_STANDALONE_GC) if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) set(BUILD_EXP_GC 1) @@ -111,7 +131,20 @@ if(FEATURE_STANDALONE_GC) # clrgcexp is build with standalone+regions if (BUILD_EXP_GC) - add_library_clr(clrgcexp SHARED ${GC_SOURCES}) + add_library_clr(clrgcexp_gc_wks OBJECT ${GC_WKS_SVR_SOURCES}) + add_dependencies(clrgcexp_gc_wks eventing_headers) + target_compile_definitions(clrgcexp_gc_wks PRIVATE USE_REGIONS) + set(CLRGGCEXP_OBJECTS + $) + if (FEATURE_SVR_GC) + add_library_clr(clrgcexp_gc_svr OBJECT ${GC_WKS_SVR_SOURCES}) + add_dependencies(clrgcexp_gc_svr eventing_headers) + target_compile_definitions(clrgcexp_gc_svr PRIVATE SERVER_GC USE_REGIONS) + list(APPEND CLRGGCEXP_OBJECTS + $) + endif() + + add_library_clr(clrgcexp SHARED ${GC_SOURCES} ${CLRGGCEXP_OBJECTS}) add_dependencies(clrgcexp eventing_headers) target_link_libraries(clrgcexp PRIVATE ${GC_LINK_LIBRARIES}) target_link_libraries(clrgcexp PRIVATE gcexp_dll_wks_descriptor) @@ -122,8 +155,20 @@ if(FEATURE_STANDALONE_GC) install_clr(TARGETS clrgcexp DESTINATIONS . COMPONENT runtime) endif (BUILD_EXP_GC) + add_library_clr(clrgc_gc_wks OBJECT ${GC_WKS_SVR_SOURCES}) + add_dependencies(clrgc_gc_wks eventing_headers) + set(CLRGC_OBJECTS + $) + if(FEATURE_SVR_GC) + add_library_clr(clrgc_gc_svr OBJECT ${GC_WKS_SVR_SOURCES}) + add_dependencies(clrgc_gc_svr eventing_headers) + target_compile_definitions(clrgc_gc_svr PRIVATE SERVER_GC) + list(APPEND CLRGC_OBJECTS + $) + endif() + # clrgc is build with standalone+segments - add_library_clr(clrgc SHARED ${GC_SOURCES}) + add_library_clr(clrgc SHARED ${GC_SOURCES} ${CLRGC_OBJECTS}) add_dependencies(clrgc eventing_headers) target_link_libraries(clrgc PRIVATE ${GC_LINK_LIBRARIES}) target_link_libraries(clrgc PRIVATE gc_dll_wks_descriptor) diff --git a/src/coreclr/gc/allocation.cpp b/src/coreclr/gc/allocation.cpp index 947f2330de5a57..171a8286820c29 100644 --- a/src/coreclr/gc/allocation.cpp +++ b/src/coreclr/gc/allocation.cpp @@ -1,6 +1,15 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC allocator::allocator (unsigned int num_b, int fbb, alloc_list* b, int gen) { @@ -1321,27 +1330,6 @@ bool gc_heap::new_allocation_allowed (int gen_number) return TRUE; } -inline -ptrdiff_t gc_heap::get_desired_allocation (int gen_number) -{ - return dd_desired_allocation (dynamic_data_of (gen_number)); -} - -inline -ptrdiff_t gc_heap::get_new_allocation (int gen_number) -{ - return dd_new_allocation (dynamic_data_of (gen_number)); -} - -//return the amount allocated so far in gen_number -inline -ptrdiff_t gc_heap::get_allocation (int gen_number) -{ - dynamic_data* dd = dynamic_data_of (gen_number); - - return dd_desired_allocation (dd) - dd_new_allocation (dd); -} - #ifdef SHORT_PLUGS inline void set_padding_in_expand (uint8_t* old_loc, @@ -3253,29 +3241,6 @@ allocation_state gc_heap::allocate_soh (int gen_number, return soh_alloc_state; } -#ifdef BACKGROUND_GC -inline -void gc_heap::bgc_track_uoh_alloc() -{ - if (current_c_gc_state == c_gc_state_planning) - { - Interlocked::Increment (&uoh_alloc_thread_count); - dprintf (3, ("h%d: inc lc: %d", heap_number, (int32_t)uoh_alloc_thread_count)); - } -} - -inline -void gc_heap::bgc_untrack_uoh_alloc() -{ - if (current_c_gc_state == c_gc_state_planning) - { - Interlocked::Decrement (&uoh_alloc_thread_count); - dprintf (3, ("h%d: dec lc: %d", heap_number, (int32_t)uoh_alloc_thread_count)); - } -} - -#endif //BACKGROUND_GC - size_t gc_heap::get_uoh_seg_size (size_t size) { size_t default_seg_size = @@ -4551,41 +4516,6 @@ BOOL gc_heap::allocate_more_space(alloc_context* acontext, size_t size, return (status == a_state_can_allocate); } -inline -CObjectHeader* gc_heap::allocate (size_t jsize, alloc_context* acontext, uint32_t flags) -{ - size_t size = Align (jsize); - assert (size >= Align (min_obj_size)); - { - retry: - uint8_t* result = acontext->alloc_ptr; - acontext->alloc_ptr+=size; - if (acontext->alloc_ptr <= acontext->alloc_limit) - { - CObjectHeader* obj = (CObjectHeader*)result; - assert (obj != 0); - return obj; - } - else - { - acontext->alloc_ptr -= size; - -#ifdef _MSC_VER -#pragma inline_depth(0) -#endif //_MSC_VER - - if (! allocate_more_space (acontext, size, flags, 0)) - return 0; - -#ifdef _MSC_VER -#pragma inline_depth(20) -#endif //_MSC_VER - - goto retry; - } - } -} - void gc_heap::leave_allocation_segment (generation* gen) { adjust_limit (0, 0, gen); @@ -5417,15 +5347,6 @@ generation* gc_heap::ensure_ephemeral_heap_segment (generation* consing_gen) #endif //!USE_REGIONS -inline -void gc_heap::init_alloc_info (generation* gen, heap_segment* seg) -{ - generation_allocation_segment (gen) = seg; - generation_allocation_pointer (gen) = heap_segment_mem (seg); - generation_allocation_limit (gen) = generation_allocation_pointer (gen); - generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); -} - inline heap_segment* gc_heap::get_next_alloc_seg (generation* gen) { @@ -5880,3 +5801,5 @@ CObjectHeader* gc_heap::allocate_uoh_object (size_t jsize, uint32_t flags, int g return obj; } + +} // namespace SVR/WKS diff --git a/src/coreclr/gc/background.cpp b/src/coreclr/gc/background.cpp index 3fc82188afa489..f151926d9309e7 100644 --- a/src/coreclr/gc/background.cpp +++ b/src/coreclr/gc/background.cpp @@ -1,6 +1,15 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC // static @@ -221,47 +230,6 @@ void gc_heap::concurrent_print_time_delta (const char* msg) } #ifdef BACKGROUND_GC -inline -BOOL gc_heap::background_marked (uint8_t* o) -{ - return mark_array_marked (o); -} - -inline -BOOL gc_heap::background_mark1 (uint8_t* o) -{ - BOOL to_mark = !mark_array_marked (o); - - dprintf (3, ("b*%zx*b(%d)", (size_t)o, (to_mark ? 1 : 0))); - if (to_mark) - { - mark_array_set_marked (o); - dprintf (4, ("n*%zx*n", (size_t)o)); - return TRUE; - } - else - return FALSE; -} - -// TODO: we could consider filtering out NULL's here instead of going to -// look for it on other heaps -inline -BOOL gc_heap::background_mark (uint8_t* o, uint8_t* low, uint8_t* high) -{ - BOOL marked = FALSE; - if ((o >= low) && (o < high)) - marked = background_mark1 (o); -#ifdef MULTIPLE_HEAPS - else if (o) - { - gc_heap* hp = heap_of (o); - assert (hp); - if ((o >= hp->background_saved_lowest_address) && (o < hp->background_saved_highest_address)) - marked = background_mark1 (o); - } -#endif //MULTIPLE_HEAPS - return marked; -} #ifdef USE_REGIONS void gc_heap::set_background_overflow_p (uint8_t* oo) @@ -3453,7 +3421,7 @@ void gc_heap::process_background_segment_end (heap_segment* seg, bgc_verify_mark_array_cleared (seg); } -inline +//inline BOOL gc_heap::fgc_should_consider_object (uint8_t* o, heap_segment* seg, BOOL consider_bgc_mark_p, @@ -4599,3 +4567,5 @@ size_t gc_heap::get_mark_array_size (heap_segment* seg) } #endif //USE_REGIONS + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/card_table.cpp b/src/coreclr/gc/card_table.cpp index 685438a68cbc35..e7e04e501eaa15 100644 --- a/src/coreclr/gc/card_table.cpp +++ b/src/coreclr/gc/card_table.cpp @@ -1,6 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + #ifdef CARD_BUNDLE // Clear the specified card bundle @@ -130,18 +140,6 @@ BOOL gc_heap::card_bundles_enabled () #endif //CARD_BUNDLE -inline -size_t gc_heap::brick_of (uint8_t* add) -{ - return (size_t)(add - lowest_address) / brick_size; -} - -inline -uint8_t* gc_heap::brick_address (size_t brick) -{ - return lowest_address + (brick_size * brick); -} - void gc_heap::clear_brick_table (uint8_t* from, uint8_t* end) { size_t from_brick = brick_of (from); @@ -149,79 +147,6 @@ void gc_heap::clear_brick_table (uint8_t* from, uint8_t* end) memset (&brick_table[from_brick], 0, sizeof(brick_table[from_brick])*(end_brick-from_brick)); } -//codes for the brick entries: -//entry == 0 -> not assigned -//entry >0 offset is entry-1 -//entry <0 jump back entry bricks -inline -void gc_heap::set_brick (size_t index, ptrdiff_t val) -{ - if (val < -32767) - { - val = -32767; - } - assert (val < 32767); - if (val >= 0) - brick_table [index] = (short)val+1; - else - brick_table [index] = (short)val; - - dprintf (3, ("set brick[%zx] to %d\n", index, (short)val)); -} - -inline -int gc_heap::get_brick_entry (size_t index) -{ -#ifdef MULTIPLE_HEAPS - return VolatileLoadWithoutBarrier(&brick_table [index]); -#else - return brick_table[index]; -#endif -} - -inline -uint8_t* gc_heap::card_address (size_t card) -{ - return (uint8_t*) (card_size * card); -} - -inline -size_t gc_heap::card_of ( uint8_t* object) -{ - return (size_t)(object) / card_size; -} - -inline -void gc_heap::clear_card (size_t card) -{ - card_table [card_word (card)] = - (card_table [card_word (card)] & ~(1 << card_bit (card))); - dprintf (3,("Cleared card %zx [%zx, %zx[", card, (size_t)card_address (card), - (size_t)card_address (card+1))); -} - -inline -void gc_heap::set_card (size_t card) -{ - size_t word = card_word (card); - card_table[word] = (card_table [word] | (1 << card_bit (card))); - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Also set the card bundle that corresponds to the card - size_t bundle_to_set = cardw_card_bundle(word); - - card_bundle_set(bundle_to_set); - - dprintf (3,("Set card %zx [%zx, %zx[ and bundle %zx", card, (size_t)card_address (card), (size_t)card_address (card+1), bundle_to_set)); -#endif -} - -inline -BOOL gc_heap::card_set_p (size_t card) -{ - return ( card_table [ card_word (card) ] & (1 << card_bit (card))); -} - void gc_heap::destroy_card_table_helper (uint32_t* c_table) { uint8_t* lowest = card_table_lowest_address (c_table); @@ -1260,8 +1185,7 @@ inline void gc_heap::verify_card_bundle_bits_set(size_t first_card_word, size_t #endif } -// Verifies that any bundles that are not set represent only cards that are not set. -inline void gc_heap::verify_card_bundles() +void gc_heap::verify_card_bundles() { #ifdef _DEBUG size_t lowest_card = card_word (card_of (lowest_address)); @@ -1276,23 +1200,22 @@ inline void gc_heap::verify_card_bundles() while (cardb < end_cardb) { uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb), lowest_card)]; - uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1), highest_card)]; + uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb + 1), highest_card)]; if (card_bundle_set_p (cardb) == 0) { - // Verify that no card is set while (card_word < card_word_end) { if (*card_word != 0) { dprintf (3, ("gc: %zd, Card word %zx for address %zx set, card_bundle %zx clear", dd_collection_count (dynamic_data_of (0)), - (size_t)(card_word-&card_table[0]), - (size_t)(card_address ((size_t)(card_word-&card_table[0]) * card_word_width)), + (size_t)(card_word - &card_table[0]), + (size_t)(card_address ((size_t)(card_word - &card_table[0]) * card_word_width)), cardb)); } - assert((*card_word)==0); + assert((*card_word) == 0); card_word++; } } @@ -1369,19 +1292,6 @@ void gc_heap::update_card_table_bundle() #endif //CARD_BUNDLE #endif //WRITE_WATCH -#ifdef COLLECTIBLE_CLASS -// We don't want to burn another ptr size space for pinned plugs to record this so just -// set the card unconditionally for collectible objects if we are demoting. -inline void -gc_heap::unconditional_set_card_collectible (uint8_t* obj) -{ - if (settings.demotion) - { - set_card (card_of (obj)); - } -} - -#endif //COLLECTIBLE_CLASS //Clear the cards [start_card, end_card[ void gc_heap::clear_cards (size_t start_card, size_t end_card) @@ -2004,3 +1914,5 @@ bool gc_heap::find_next_chunk(card_marking_enumerator& card_mark_enumerator, hea } #endif //FEATURE_CARD_MARKING_STEALING + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/collect.cpp b/src/coreclr/gc/collect.cpp index ee258ac3141b8d..a74d878f1e73b3 100644 --- a/src/coreclr/gc/collect.cpp +++ b/src/coreclr/gc/collect.cpp @@ -1,6 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + wait_full_gc_status gc_heap::full_gc_wait (GCEvent *event, int time_out_ms) { #ifdef MULTIPLE_HEAPS @@ -1722,3 +1732,5 @@ void gc_heap::do_post_gc() mark_list_overflow = false; } } + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/diagnostics.cpp b/src/coreclr/gc/diagnostics.cpp index 17f485d6d6af62..78d3cd7304144c 100644 --- a/src/coreclr/gc/diagnostics.cpp +++ b/src/coreclr/gc/diagnostics.cpp @@ -1,6 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + void gc_heap::add_to_history_per_heap() { #if defined(GC_HISTORY) && defined(BACKGROUND_GC) @@ -1785,3 +1795,5 @@ void gc_heap::walk_read_only_segment(heap_segment *seg, void *pvContext, object_ } #endif //FEATURE_BASICFREEZE + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/dynamic_heap_count.cpp b/src/coreclr/gc/dynamic_heap_count.cpp index 35599e1441f126..853a1c5ffe58cb 100644 --- a/src/coreclr/gc/dynamic_heap_count.cpp +++ b/src/coreclr/gc/dynamic_heap_count.cpp @@ -1,6 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + #ifdef USE_REGIONS #ifdef DYNAMIC_HEAP_COUNT void gc_heap::check_decommissioned_heap() @@ -1535,3 +1545,5 @@ void gc_heap::add_to_bgc_hc_history (hc_record_stage stage) #endif //DYNAMIC_HEAP_COUNT #endif //USE_REGIONS + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/dynamic_tuning.cpp b/src/coreclr/gc/dynamic_tuning.cpp index d43357677984ff..9e315b5e5ca6cb 100644 --- a/src/coreclr/gc/dynamic_tuning.cpp +++ b/src/coreclr/gc/dynamic_tuning.cpp @@ -1,6 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR { +#else // SERVER_GC +namespace WKS { +#endif // SERVER_GC + // Things we need to manually initialize: // gen0 min_size - based on cache @@ -42,48 +50,6 @@ static static_data static_data_table[latency_level_last - latency_level_first + }, }; -inline BOOL -gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp) -{ - BOOL ret = FALSE; - - switch (tp) - { - case tuning_deciding_condemned_gen: -#ifndef USE_REGIONS - case tuning_deciding_compaction: - case tuning_deciding_expansion: -#endif //USE_REGIONS - case tuning_deciding_full_gc: - { - ret = (!ephemeral_gen_fit_p (tp)); - break; - } -#ifndef USE_REGIONS - case tuning_deciding_promote_ephemeral: - { - size_t new_gen0size = approximate_new_allocation(); - ptrdiff_t plan_ephemeral_size = total_ephemeral_size; - - dprintf (GTC_LOG, ("h%d: plan eph size is %zd, new gen0 is %zd", - heap_number, plan_ephemeral_size, new_gen0size)); - // If we were in no_gc_region we could have allocated a larger than normal segment, - // and the next seg we allocate will be a normal sized seg so if we can't fit the new - // ephemeral generations there, do an ephemeral promotion. - ret = ((soh_segment_size - segment_info_size) < (plan_ephemeral_size + new_gen0size)); - break; - } -#endif //USE_REGIONS - default: - { - assert (!"invalid tuning reason"); - break; - } - } - - return ret; -} - BOOL gc_heap::dt_high_frag_p (gc_tuning_point tp, int gen_number, @@ -146,132 +112,6 @@ gc_heap::dt_high_frag_p (gc_tuning_point tp, return ret; } -inline BOOL -gc_heap::dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number) -{ - BOOL ret = FALSE; - - switch (tp) - { - case tuning_deciding_condemned_gen: - { - if (gen_number == max_generation) - { - size_t est_maxgen_free = estimated_reclaim (gen_number); - - uint32_t num_heaps = 1; -#ifdef MULTIPLE_HEAPS - num_heaps = gc_heap::n_heaps; -#endif //MULTIPLE_HEAPS - - size_t min_frag_th = min_reclaim_fragmentation_threshold (num_heaps); - dprintf (GTC_LOG, ("h%d, min frag is %zd", heap_number, min_frag_th)); - ret = (est_maxgen_free >= min_frag_th); - } - else - { - assert (0); - } - break; - } - - default: - break; - } - - return ret; -} - -// DTREVIEW: Right now we only estimate gen2 fragmentation. -// on 64-bit though we should consider gen1 or even gen0 fragmentation as -// well -inline BOOL -gc_heap::dt_estimate_high_frag_p (gc_tuning_point tp, int gen_number, uint64_t available_mem) -{ - BOOL ret = FALSE; - - switch (tp) - { - case tuning_deciding_condemned_gen: - { - if (gen_number == max_generation) - { - dynamic_data* dd = dynamic_data_of (gen_number); - float est_frag_ratio = 0; - if (dd_current_size (dd) == 0) - { - est_frag_ratio = 1; - } - else if ((dd_fragmentation (dd) == 0) || (dd_fragmentation (dd) + dd_current_size (dd) == 0)) - { - est_frag_ratio = 0; - } - else - { - est_frag_ratio = (float)dd_fragmentation (dd) / (float)(dd_fragmentation (dd) + dd_current_size (dd)); - } - - size_t est_frag = (dd_fragmentation (dd) + (size_t)((dd_desired_allocation (dd) - dd_new_allocation (dd)) * est_frag_ratio)); - dprintf (GTC_LOG, ("h%d: gen%d: current_size is %zd, frag is %zd, est_frag_ratio is %d%%, estimated frag is %zd", - heap_number, - gen_number, - dd_current_size (dd), - dd_fragmentation (dd), - (int)(est_frag_ratio * 100), - est_frag)); - - uint32_t num_heaps = 1; - -#ifdef MULTIPLE_HEAPS - num_heaps = gc_heap::n_heaps; -#endif //MULTIPLE_HEAPS - uint64_t min_frag_th = min_high_fragmentation_threshold(available_mem, num_heaps); - //dprintf (GTC_LOG, ("h%d, min frag is %zd", heap_number, min_frag_th)); - ret = (est_frag >= min_frag_th); - } - else - { - assert (0); - } - break; - } - - default: - break; - } - - return ret; -} - -inline BOOL -gc_heap::dt_low_card_table_efficiency_p (gc_tuning_point tp) -{ - BOOL ret = FALSE; - - switch (tp) - { - case tuning_deciding_condemned_gen: - { - /* promote into max-generation if the card table has too many - * generation faults besides the n -> 0 - */ - ret = (generation_skip_ratio < generation_skip_ratio_threshold); - break; - } - - default: - break; - } - - return ret; -} - -inline BOOL -gc_heap::dt_high_memory_load_p() -{ - return ((settings.entry_memory_load >= high_memory_load_th) || g_low_memory_status); -} - #if defined(USE_REGIONS) bool gc_heap::near_heap_hard_limit_p() { @@ -2217,26 +2057,6 @@ size_t gc_heap::joined_youngest_desired (size_t new_allocation) #endif //HOST_64BIT -inline -gc_history_global* gc_heap::get_gc_data_global() -{ -#ifdef BACKGROUND_GC - return (settings.concurrent ? &bgc_data_global : &gc_data_global); -#else - return &gc_data_global; -#endif //BACKGROUND_GC -} - -inline -gc_history_per_heap* gc_heap::get_gc_data_per_heap() -{ -#ifdef BACKGROUND_GC - return (settings.concurrent ? &bgc_data_per_heap : &gc_data_per_heap); -#else - return &gc_data_per_heap; -#endif //BACKGROUND_GC -} - void gc_heap::compute_new_dynamic_data (int gen_number) { _ASSERTE(gen_number >= 0); @@ -2854,3 +2674,5 @@ void gc_heap::accumulate_committed_bytes(heap_segment* seg, size_t& committed_by seg = heap_segment_next_rw (seg); } } + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/finalization.cpp b/src/coreclr/gc/finalization.cpp index 46d8a187b066fc..e85ec827f48bef 100644 --- a/src/coreclr/gc/finalization.cpp +++ b/src/coreclr/gc/finalization.cpp @@ -1,6 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR { +#else // SERVER_GC +namespace WKS { +#endif // SERVER_GC + void gc_heap::schedule_finalizer_work (FinalizerWorkItem* callback) { FinalizerWorkItem* prev; @@ -700,3 +708,5 @@ void gc_heap::walk_finalize_queue (fq_walk_fn fn) finalize_queue->WalkFReachableObjects (fn); #endif //FEATURE_PREMORTEM_FINALIZATION } + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index ca84b2c8599de5..24839af5b0d42e 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -16,47 +16,7 @@ // allocation helpers in gcscan.cpp // -#include "common.h" -#include "gcenv.h" - -#include "gc.h" -#include "gcscan.h" -#include "gcdesc.h" -#include "softwarewritewatch.h" -#include "handletable.h" -#include "handletable.inl" -#include "gcenv.inl" -#include "gceventstatus.h" -#include - -// If FEATURE_INTERPRETER is set, always enable the GC side of FEATURE_CONSERVATIVE_GC -#ifdef FEATURE_INTERPRETER -#ifndef FEATURE_CONSERVATIVE_GC -#define FEATURE_CONSERVATIVE_GC -#endif -#endif // FEATURE_INTERPRETER - -#ifdef __INTELLISENSE__ -#if defined(FEATURE_SVR_GC) - -#define SERVER_GC 1 - -#else // defined(FEATURE_SVR_GC) - -#ifdef SERVER_GC -#undef SERVER_GC -#endif - -#endif // defined(FEATURE_SVR_GC) -#endif // __INTELLISENSE__ - -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) -#include "vxsort/do_vxsort.h" -#define USE_VXSORT -#else -#define USE_INTROSORT -#endif // TARGET_AMD64 || TARGET_ARM64 -#include "introsort.h" +#include "gcinternal.h" #ifdef SERVER_GC namespace SVR { @@ -64,77 +24,12 @@ namespace SVR { namespace WKS { #endif // SERVER_GC -#include "gcimpl.h" -#include "gcpriv.h" - -#ifdef DACCESS_COMPILE -#error this source file should not be compiled with DACCESS_COMPILE! -#endif //DACCESS_COMPILE - -// We just needed a simple random number generator for testing. -class gc_rand -{ -public: - static uint64_t x; - - static uint64_t get_rand() - { - x = (314159269*x+278281) & 0x7FFFFFFF; - return x; - } - - // obtain random number in the range 0 .. r-1 - static uint64_t get_rand(uint64_t r) - { - // require r >= 0 - uint64_t x = (uint64_t)((get_rand() * r) >> 31); - return x; - } -}; - uint64_t gc_rand::x = 0; #if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE) BOOL bgc_heap_walk_for_etw_p = FALSE; #endif //BACKGROUND_GC && FEATURE_EVENT_TRACE -#define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0)) -#define commit_min_th (16*OS_PAGE_SIZE) - -#define MIN_SOH_CROSS_GEN_REFS (400) -#define MIN_LOH_CROSS_GEN_REFS (800) - -#ifdef SERVER_GC -#define partial_size_th 100 -#define num_partial_refs 64 -#else //SERVER_GC -#define partial_size_th 100 -#define num_partial_refs 32 -#endif //SERVER_GC - -#define demotion_plug_len_th (6*1024*1024) - -#ifdef USE_REGIONS -// If the survived / region_size is 90+%, we don't compact this region. -#define sip_surv_ratio_th (90) -// If the survived due to cards from old generations / region_size is 90+%, -// we don't compact this region, also we immediately promote it to gen2. -#define sip_old_card_surv_ratio_th (90) -#endif //USE_REGIONS - -#ifdef HOST_64BIT -#define MARK_STACK_INITIAL_LENGTH 1024 -#else -#define MARK_STACK_INITIAL_LENGTH 128 -#endif // HOST_64BIT - -#define LOH_PIN_QUEUE_LENGTH 100 -#define LOH_PIN_DECAY 10 - -#define UOH_ALLOCATION_RETRY_MAX_COUNT 2 - -#define MAX_YP_SPIN_COUNT_UNIT 32768 - uint32_t yp_spin_count_unit = 0; uint32_t original_spin_count_unit = 0; size_t loh_size_threshold = LARGE_OBJECT_SIZE; @@ -160,7 +55,7 @@ uint8_t g_build_variant = 2; VOLATILE(int32_t) g_no_gc_lock = -1; #ifdef TRACE_GC -const char * const allocation_state_str[] = { +extern const char * const allocation_state_str[] = { "start", "can_allocate", "cant_allocate", @@ -230,7 +125,7 @@ static const char* const str_gc_pause_modes[] = "no_gc" }; -static const char* const str_root_kinds[] = { +const char* const str_root_kinds[] = { "Stack", "FinalizeQueue", "Handles", @@ -244,20 +139,6 @@ static const char* const str_root_kinds[] = { }; #endif //DT_LOG || TRACE_GC -inline -BOOL is_induced (gc_reason reason) -{ - return ((reason == reason_induced) || - (reason == reason_induced_noforce) || - (reason == reason_lowmemory) || - (reason == reason_lowmemory_blocking) || - (reason == reason_induced_compacting) || - (reason == reason_induced_aggressive) || - (reason == reason_lowmemory_host) || - (reason == reason_lowmemory_host_blocking)); -} - - gc_oh_num gen_to_oh(int gen) { switch (gen) @@ -334,78 +215,6 @@ double gc_heap::bgc_tuning::ratio_correction_step = 0.0; int gc_heap::saved_bgc_tuning_reason = -1; #endif //BGC_SERVO_TUNING -inline -size_t round_up_power2 (size_t size) -{ - // Get the 0-based index of the most-significant bit in size-1. - // If the call failed (because size-1 is zero), size must be 1, - // so return 1 (because 1 rounds up to itself). - DWORD highest_set_bit_index; - if (0 == -#ifdef HOST_64BIT - BitScanReverse64( -#else - BitScanReverse( -#endif - &highest_set_bit_index, size - 1)) { return 1; } - - // The size == 0 case (which would have overflowed to SIZE_MAX when decremented) - // is handled below by relying on the fact that highest_set_bit_index is the maximum value - // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that - // number of bits shifts in zeros from the right, resulting in an output of zero. - return static_cast(2) << highest_set_bit_index; -} - -inline -size_t round_down_power2 (size_t size) -{ - // Get the 0-based index of the most-significant bit in size. - // If the call failed, size must be zero so return zero. - DWORD highest_set_bit_index; - if (0 == -#ifdef HOST_64BIT - BitScanReverse64( -#else - BitScanReverse( -#endif - &highest_set_bit_index, size)) { return 0; } - - // Left-shift 1 by highest_set_bit_index to get back a value containing only - // the most-significant set bit of size, i.e. size rounded down - // to the next power-of-two value. - return static_cast(1) << highest_set_bit_index; -} - -// Get the 0-based index of the most-significant bit in the value. -// Returns -1 if the input value is zero (i.e. has no set bits). -inline -int index_of_highest_set_bit (size_t value) -{ - // Get the 0-based index of the most-significant bit in the value. - // If the call failed (because value is zero), return -1. - DWORD highest_set_bit_index; - return (0 == -#ifdef HOST_64BIT - BitScanReverse64( -#else - BitScanReverse( -#endif - &highest_set_bit_index, value)) ? -1 : static_cast(highest_set_bit_index); -} - - -inline -float mb (size_t num) -{ - return (float)((float)num / 1000.0 / 1000.0); -} - -inline -size_t gib (size_t num) -{ - return (num / 1024 / 1024 / 1024); -} - #ifdef BACKGROUND_GC uint32_t bgc_alloc_spin_count = 140; uint32_t bgc_alloc_spin = 2; @@ -428,15 +237,6 @@ float bgc_uoh_inc_ratio_alloc_wait = 2.0f; float bgc_uoh_inc_ratio_alloc_wait = 1.0f; #endif //USE_REGIONS -inline -void c_write (uint32_t& place, uint32_t value) -{ - Interlocked::Exchange (&place, value); -} - -// If every heap's gen2 or gen3 size is less than this threshold we will do a blocking GC. -const size_t bgc_min_per_heap = 4*1024*1024; - int gc_heap::gchist_index = 0; gc_mechanisms_store gc_heap::gchist[max_history_count]; @@ -453,7 +253,6 @@ BOOL gc_config_log_on = FALSE; FILE* gc_config_log = NULL; // we keep this much in a buffer and only flush when the buffer is full -#define gc_config_log_buffer_size (1*1024) // TEMP uint8_t* gc_config_log_buffer = 0; size_t gc_config_log_buffer_offset = 0; @@ -541,5795 +340,2582 @@ process_sync_log_stats() #ifdef MULTIPLE_HEAPS uint32_t g_num_active_processors = 0; -// Note that when a join is no longer used we still keep the values here because -// tooling already recognized them as having the meaning they were assigned originally. -// It doesn't break tooling if we stop using them but does if we assign a new meaning -// to them. -enum gc_join_stage -{ - gc_join_init_cpu_mapping = 0, - gc_join_done = 1, - gc_join_generation_determined = 2, - gc_join_begin_mark_phase = 3, - gc_join_scan_dependent_handles = 4, - gc_join_rescan_dependent_handles = 5, - gc_join_scan_sizedref_done = 6, - gc_join_null_dead_short_weak = 7, - gc_join_scan_finalization = 8, - gc_join_null_dead_long_weak = 9, - gc_join_null_dead_syncblk = 10, - gc_join_decide_on_compaction = 11, - gc_join_rearrange_segs_compaction = 12, - gc_join_adjust_handle_age_compact = 13, - gc_join_adjust_handle_age_sweep = 14, - gc_join_begin_relocate_phase = 15, - gc_join_relocate_phase_done = 16, - gc_join_verify_objects_done = 17, - gc_join_start_bgc = 18, - gc_join_restart_ee = 19, - gc_join_concurrent_overflow = 20, - gc_join_suspend_ee = 21, - gc_join_bgc_after_ephemeral = 22, - gc_join_allow_fgc = 23, - gc_join_bgc_sweep = 24, - gc_join_suspend_ee_verify = 25, - gc_join_restart_ee_verify = 26, - gc_join_set_state_free = 27, - gc_r_join_update_card_bundle = 28, - gc_join_after_absorb = 29, - gc_join_verify_copy_table = 30, - gc_join_after_reset = 31, - gc_join_after_ephemeral_sweep = 32, - gc_join_after_profiler_heap_walk = 33, - gc_join_minimal_gc = 34, - gc_join_after_commit_soh_no_gc = 35, - gc_join_expand_loh_no_gc = 36, - gc_join_final_no_gc = 37, - // No longer in use but do not remove, see comments for this enum. - gc_join_disable_software_write_watch = 38, - gc_join_merge_temp_fl = 39, - gc_join_bridge_processing = 40, - gc_join_max = 41 -}; +t_join gc_t_join; -enum gc_join_flavor -{ - join_flavor_server_gc = 0, - join_flavor_bgc = 1 -}; +#ifdef BACKGROUND_GC +t_join bgc_t_join; +#endif //BACKGROUND_GC -#define first_thread_arrived 2 -#pragma warning(push) -#pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads -struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure -{ - // Shared non volatile keep on separate line to prevent eviction - int n_threads; +#endif //MULTIPLE_HEAPS - // Keep polling/wait structures on separate line write once per join - DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) - GCEvent joined_event[3]; // the last event in the array is only used for first_thread_arrived. - Volatile lock_color; - VOLATILE(BOOL) wait_done; - VOLATILE(BOOL) joined_p; +void reset_memory (uint8_t* o, size_t sizeo); - // Keep volatile counted locks on separate cache line write many per join - DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) - VOLATILE(int) join_lock; - VOLATILE(int) r_join_lock; +#ifdef WRITE_WATCH -}; -#pragma warning(pop) +#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +bool virtual_alloc_hardware_write_watch = false; +#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -enum join_type -{ - type_last_join = 0, - type_join = 1, - type_restart = 2, - type_first_r_join = 3, - type_r_join = 4 -}; +bool hardware_write_watch_capability = false; -enum join_time -{ - time_start = 0, - time_end = 1 -}; +#else //WRITE_WATCH +#define mem_reserve (MEM_RESERVE) +#endif //WRITE_WATCH -enum join_heap_index +void WaitLongerNoInstru (int i) { - join_heap_restart = 100, - join_heap_r_restart = 200 -}; + // every 8th attempt: + bool bToggleGC = GCToEEInterface::EnablePreemptiveGC(); -class t_join -{ - join_structure join_struct; - - int id; - gc_join_flavor flavor; - -#ifdef JOIN_STATS - uint64_t start[MAX_SUPPORTED_CPUS], end[MAX_SUPPORTED_CPUS], start_seq; - // remember join id and last thread to arrive so restart can use these - int thd; - // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval - uint64_t start_tick; - // counters for joins, in 1000's of clock cycles - uint64_t elapsed_total[gc_join_max], wake_total[gc_join_max], seq_loss_total[gc_join_max], par_loss_total[gc_join_max], in_join_total[gc_join_max]; -#endif //JOIN_STATS - -public: - BOOL init (int n_th, gc_join_flavor f) - { - dprintf (JOIN_LOG, ("Initializing join structure")); - join_struct.n_threads = n_th; - join_struct.lock_color = 0; - for (int i = 0; i < 3; i++) + // if we're waiting for gc to finish, we should block immediately + if (g_fSuspensionPending == 0) + { + if (g_num_processors > 1) { - if (!join_struct.joined_event[i].IsValid()) - { - join_struct.joined_p = FALSE; - dprintf (JOIN_LOG, ("Creating join event %d", i)); - // TODO - changing this to a non OS event - // because this is also used by BGC threads which are - // managed threads and WaitEx does not allow you to wait - // for an OS event on a managed thread. - // But we are not sure if this plays well in the hosting - // environment. - //join_struct.joined_event[i].CreateOSManualEventNoThrow(FALSE); - if (!join_struct.joined_event[i].CreateManualEventNoThrow(FALSE)) - return FALSE; - } + YieldProcessor(); // indicate to the processor that we are spinning + if (i & 0x01f) + GCToOSInterface::YieldThread (0); + else + GCToOSInterface::Sleep (5); } - join_struct.join_lock = join_struct.n_threads; - join_struct.r_join_lock = join_struct.n_threads; - join_struct.wait_done = FALSE; - flavor = f; - -#ifdef JOIN_STATS - start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); -#endif //JOIN_STATS - - return TRUE; + else + GCToOSInterface::Sleep (5); } - void update_n_threads(int n_th) + // If CLR is hosted, a thread may reach here while it is in preemptive GC mode, + // or it has no Thread object, in order to force a task to yield, or to triger a GC. + // It is important that the thread is going to wait for GC. Otherwise the thread + // is in a tight loop. If the thread has high priority, the perf is going to be very BAD. + if (bToggleGC) { - join_struct.n_threads = n_th; - join_struct.join_lock = n_th; - join_struct.r_join_lock = n_th; +#ifdef _DEBUG + // In debug builds, all enter_spin_lock operations go through this code. If a GC has + // started, it is important to block until the GC thread calls set_gc_done (since it is + // guaranteed to have cleared g_TrapReturningThreads by this point). This avoids livelock + // conditions which can otherwise occur if threads are allowed to spin in this function + // (and therefore starve the GC thread) between the point when the GC thread sets the + // WaitForGC event and the point when the GC thread clears g_TrapReturningThreads. + if (gc_heap::gc_started) + { + gc_heap::wait_for_gc_done(); + } +#endif // _DEBUG + GCToEEInterface::DisablePreemptiveGC(); } - - int get_num_threads() + else if (g_fSuspensionPending > 0) { - return join_struct.n_threads; + g_theGCHeap->WaitUntilGCComplete(); } +} - // This is for instrumentation only. - int get_join_lock() - { - return VolatileLoadWithoutBarrier (&join_struct.join_lock); - } +const int32_t lock_free = -1; +const int32_t lock_taken = 0; +const int32_t lock_decommissioned = 1; - void destroy () - { - dprintf (JOIN_LOG, ("Destroying join structure")); - for (int i = 0; i < 3; i++) - { - if (join_struct.joined_event[i].IsValid()) - join_struct.joined_event[i].CloseEvent(); - } - } - inline void fire_event (int heap, join_time time, join_type type, int join_id) +// If our heap got decommissioned, we need to try an existing heap. +//inline +bool gc_heap::should_move_heap (GCSpinLock* msl) +{ +#ifdef MULTIPLE_HEAPS + if (msl->lock == lock_decommissioned) { - FIRE_EVENT(GCJoin_V2, heap, time, type, join_id); + dprintf (5555, ("heap#%d got decommissioned! need to retry", heap_number)); } + return (msl->lock == lock_decommissioned); +#else //MULTIPLE_HEAPS + return false; +#endif //MULTIPLE_HEAPS +} - void join (gc_heap* gch, int join_id) +// All the places where we could be stopped because there was a suspension should call should_move_heap to check if we need to return +// so we can try another heap or we can continue the allocation on the same heap. +enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl) +{ + do { -#ifdef JOIN_STATS - // parallel execution ends here - end[gch->heap_number] = get_ts(); -#endif //JOIN_STATS - - assert (!join_struct.joined_p); - int color = join_struct.lock_color.LoadWithoutBarrier(); +#ifdef DYNAMIC_HEAP_COUNT + uint64_t start = GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT - if (Interlocked::Decrement(&join_struct.join_lock) != 0) + unsigned int i = 0; + while (VolatileLoad (&msl->lock) != lock_free) { - dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d", - flavor, join_id, (int32_t)(join_struct.join_lock))); - - fire_event (gch->heap_number, time_start, type_join, join_id); - - //busy wait around the color - if (color == join_struct.lock_color.LoadWithoutBarrier()) + if (should_move_heap (msl)) { -respin: - int spin_count = 128 * yp_spin_count_unit; - for (int j = 0; j < spin_count; j++) + return msl_retry_different_heap; + } + if ((++i & 7) && !IsGCInProgress ()) + { + if (g_num_processors > 1) { - if (color != join_struct.lock_color.LoadWithoutBarrier()) +#ifndef MULTIPLE_HEAPS + int spin_count = 32 * yp_spin_count_unit; +#else //!MULTIPLE_HEAPS + int spin_count = yp_spin_count_unit; +#endif //!MULTIPLE_HEAPS + for (int j = 0; j < spin_count; j++) { - break; + if (VolatileLoad (&msl->lock) == lock_free || IsGCInProgress ()) + break; + // give the HT neighbor a chance to run + YieldProcessor (); } - YieldProcessor(); // indicate to the processor that we are spinning - } - - // we've spun, and if color still hasn't changed, fall into hard wait - if (color == join_struct.lock_color.LoadWithoutBarrier()) - { - dprintf (JOIN_LOG, ("join%d(%d): Join() hard wait on reset event %d, join_lock is now %d", - flavor, join_id, color, (int32_t)(join_struct.join_lock))); - - uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE); - - if (dwJoinWait != WAIT_OBJECT_0) + if (VolatileLoad (&msl->lock) != lock_free && !IsGCInProgress ()) { - STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait); - FATAL_GC_ERROR (); +#ifdef DYNAMIC_HEAP_COUNT + start -= GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT + safe_switch_to_thread (); +#ifdef DYNAMIC_HEAP_COUNT + start += GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT } } - - // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() - if (color == join_struct.lock_color.LoadWithoutBarrier()) + else { - dprintf (9999, ("---h%d %d j%d %d - respin!!! (c:%d-%d)", - gch->heap_number, join_id, join_struct.n_threads, color, join_struct.lock_color.LoadWithoutBarrier())); - goto respin; + safe_switch_to_thread (); } - - dprintf (JOIN_LOG, ("join%d(%d): Join() done, join_lock is %d", - flavor, join_id, (int32_t)(join_struct.join_lock))); } - - fire_event (gch->heap_number, time_end, type_join, join_id); - -#ifdef JOIN_STATS - // parallel execution starts here - start[gch->heap_number] = get_ts(); - Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number])); -#endif //JOIN_STATS - } - else - { - fire_event (gch->heap_number, time_start, type_last_join, join_id); - - join_struct.joined_p = TRUE; - dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id)); - join_struct.joined_event[!color].Reset(); - id = join_id; -#ifdef JOIN_STATS - // remember the join id, the last thread arriving, the start of the sequential phase, - // and keep track of the cycles spent waiting in the join - thd = gch->heap_number; - start_seq = get_ts(); - Interlocked::ExchangeAdd(&in_join_total[join_id], (start_seq - end[gch->heap_number])); -#endif //JOIN_STATS + else + { +#ifdef DYNAMIC_HEAP_COUNT + start -= GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT + WaitLongerNoInstru (i); +#ifdef DYNAMIC_HEAP_COUNT + start += GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT + } } +#ifdef DYNAMIC_HEAP_COUNT + uint64_t end = GetHighPrecisionTimeStamp(); + Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start); + dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time)); +#endif //DYNAMIC_HEAP_COUNT } + while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free); - // Reverse join - first thread gets here does the work; other threads will only proceed - // after the work is done. - // Note that you cannot call this twice in a row on the same thread. Plus there's no - // need to call it twice in row - you should just merge the work. - BOOL r_join (gc_heap* gch, int join_id) - { - - if (join_struct.n_threads == 1) - { - return TRUE; - } + return msl_entered; +} - if (Interlocked::CompareExchange(&join_struct.r_join_lock, 0, join_struct.n_threads) == 0) - { - fire_event (gch->heap_number, time_start, type_join, join_id); +#ifdef _DEBUG - dprintf (JOIN_LOG, ("r_join() Waiting...")); +#define ASSERT_NOT_HOLDING_SPIN_LOCK(pSpinLock) \ + _ASSERTE((pSpinLock)->holding_thread != GCToEEInterface::GetThread()); - //busy wait around the color -respin: - int spin_count = 256 * yp_spin_count_unit; - for (int j = 0; j < spin_count; j++) - { - if (join_struct.wait_done) - { - break; - } - YieldProcessor(); // indicate to the processor that we are spinning - } +#else //_DEBUG +#endif //_DEBUG - // we've spun, and if color still hasn't changed, fall into hard wait - if (!join_struct.wait_done) - { - dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived)); - uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE); - if (dwJoinWait != WAIT_OBJECT_0) - { - STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait); - FATAL_GC_ERROR (); - } - } +bool gc_heap::enable_preemptive () +{ + return GCToEEInterface::EnablePreemptiveGC(); +} - // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() - if (!join_struct.wait_done) - { - goto respin; - } - - dprintf (JOIN_LOG, ("r_join() done")); - - fire_event (gch->heap_number, time_end, type_join, join_id); - - return FALSE; - } - else - { - fire_event (gch->heap_number, time_start, type_first_r_join, join_id); - return TRUE; - } - } - -#ifdef JOIN_STATS - uint64_t get_ts() +void gc_heap::disable_preemptive (bool restore_cooperative) +{ + if (restore_cooperative) { - return GCToOSInterface::QueryPerformanceCounter(); + GCToEEInterface::DisablePreemptiveGC(); } +} - void start_ts (gc_heap* gch) - { - // parallel execution ends here - start[gch->heap_number] = get_ts(); - } -#endif //JOIN_STATS +inline +ptrdiff_t round_down (ptrdiff_t add, int pitch) +{ + return ((add / pitch) * pitch); +} - void restart() - { -#ifdef JOIN_STATS - uint64_t elapsed_seq = get_ts() - start_seq; - uint64_t max = 0, sum = 0, wake = 0; - uint64_t min_ts = start[0]; - for (int i = 1; i < join_struct.n_threads; i++) - { - if(min_ts > start[i]) min_ts = start[i]; - } +#if defined(FEATURE_STRUCTALIGN) && defined(RESPECT_LARGE_ALIGNMENT) +// FEATURE_STRUCTALIGN allows the compiler to dictate the alignment, +// i.e, if a larger alignment matters or is beneficial, the compiler +// generated info tells us so. RESPECT_LARGE_ALIGNMENT is just the +// converse - it's a heuristic for the GC to use a larger alignment. +#error FEATURE_STRUCTALIGN should imply !RESPECT_LARGE_ALIGNMENT +#endif - for (int i = 0; i < join_struct.n_threads; i++) - { - uint64_t wake_delay = start[i] - min_ts; - uint64_t elapsed = end[i] - start[i]; - if (max < elapsed) - max = elapsed; - sum += elapsed; - wake += wake_delay; - } - uint64_t seq_loss = (join_struct.n_threads - 1)*elapsed_seq; - uint64_t par_loss = join_struct.n_threads*max - sum; - double efficiency = 0.0; - if (max > 0) - efficiency = sum*100.0/(join_struct.n_threads*max); +#if defined(FEATURE_STRUCTALIGN) && defined(FEATURE_LOH_COMPACTION) +#error FEATURE_STRUCTALIGN and FEATURE_LOH_COMPACTION are mutually exclusive +#endif - const double ts_scale = 1e-6; +// This is always power of 2. +#ifdef HOST_64BIT +const size_t min_segment_size_hard_limit = 1024*1024*16; +#else //HOST_64BIT +const size_t min_segment_size_hard_limit = 1024*1024*4; +#endif //HOST_64BIT - // enable this printf to get statistics on each individual join as it occurs - //printf("join #%3d seq_loss = %5g par_loss = %5g efficiency = %3.0f%%\n", join_id, ts_scale*seq_loss, ts_scale*par_loss, efficiency); +extern const size_t etw_allocation_tick = 100*1024; - elapsed_total[id] += sum; - wake_total[id] += wake; - seq_loss_total[id] += seq_loss; - par_loss_total[id] += par_loss; +extern const size_t low_latency_alloc = 256*1024; - // every 10 seconds, print a summary of the time spent in each type of join - if (GCToOSInterface::GetLowPrecisionTimeStamp() - start_tick > 10*1000) - { - printf("**** summary *****\n"); - for (int i = 0; i < 16; i++) - { - printf("join #%3d elapsed_total = %8g wake_loss = %8g seq_loss = %8g par_loss = %8g in_join_total = %8g\n", - i, - ts_scale*elapsed_total[i], - ts_scale*wake_total[i], - ts_scale*seq_loss_total[i], - ts_scale*par_loss_total[i], - ts_scale*in_join_total[i]); - elapsed_total[i] = wake_total[i] = seq_loss_total[i] = par_loss_total[i] = in_join_total[i] = 0; - } - start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); - } -#endif //JOIN_STATS +extern const size_t fgn_check_quantum = 2*1024*1024; - fire_event (join_heap_restart, time_start, type_restart, -1); - assert (join_struct.joined_p); - join_struct.joined_p = FALSE; - join_struct.join_lock = join_struct.n_threads; - dprintf (JOIN_LOG, ("join%d(%d): Restarting from join: join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock))); - int color = join_struct.lock_color.LoadWithoutBarrier(); - join_struct.lock_color = !color; - join_struct.joined_event[color].Set(); +#ifdef MH_SC_MARK +const int max_snoop_level = 128; +#endif //MH_SC_MARK - fire_event (join_heap_restart, time_end, type_restart, -1); +#ifdef USE_REGIONS +void region_write_barrier_settings (WriteBarrierParameters* args, + gc_heap::region_info* map_region_to_generation_skewed, + uint8_t region_shr) +{ + switch (GCConfig::GetGCWriteBarrier()) + { + default: + case GCConfig::WRITE_BARRIER_DEFAULT: + case GCConfig::WRITE_BARRIER_REGION_BIT: + // bitwise region write barrier is the default now + args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed; + args->region_shr = region_shr; + args->region_use_bitwise_write_barrier = true; + break; -#ifdef JOIN_STATS - start[thd] = get_ts(); -#endif //JOIN_STATS - } + case GCConfig::WRITE_BARRIER_REGION_BYTE: + // bytewise region write barrier + args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed; + args->region_shr = region_shr; + assert (args->region_use_bitwise_write_barrier == false); + break; - BOOL joined() - { - dprintf (JOIN_LOG, ("join%d(%d): joined, join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock))); - return join_struct.joined_p; + case GCConfig::WRITE_BARRIER_SERVER: + // server write barrier + // args should have been zero initialized + assert (args->region_use_bitwise_write_barrier == false); + assert (args->region_to_generation_table == nullptr); + assert (args->region_shr == 0); + break; } +} +#endif //USE_REGIONS - void r_restart() - { - if (join_struct.n_threads != 1) - { - fire_event (join_heap_r_restart, time_start, type_restart, -1); - join_struct.wait_done = TRUE; - join_struct.joined_event[first_thread_arrived].Set(); - fire_event (join_heap_r_restart, time_end, type_restart, -1); - } - } +void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high +#ifdef USE_REGIONS + , gc_heap::region_info* map_region_to_generation_skewed + , uint8_t region_shr +#endif //USE_REGIONS + ) +{ +#ifndef USE_REGIONS + initGCShadow(); +#endif - void r_init() - { - if (join_struct.n_threads != 1) - { - join_struct.r_join_lock = join_struct.n_threads; - join_struct.wait_done = FALSE; - join_struct.joined_event[first_thread_arrived].Reset(); - } - } -}; + WriteBarrierParameters args = {}; + args.operation = WriteBarrierOp::StompEphemeral; + args.is_runtime_suspended = true; + args.ephemeral_low = ephemeral_low; + args.ephemeral_high = ephemeral_high; +#ifdef USE_REGIONS + region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr); +#endif //USE_REGIONS + GCToEEInterface::StompWriteBarrier(&args); +} -t_join gc_t_join; +void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high +#ifdef USE_REGIONS + , gc_heap::region_info* map_region_to_generation_skewed + , uint8_t region_shr +#endif //USE_REGIONS + ) +{ + WriteBarrierParameters args = {}; + args.operation = WriteBarrierOp::Initialize; + args.is_runtime_suspended = true; + args.requires_upper_bounds_check = false; + args.card_table = g_gc_card_table; -#ifdef BACKGROUND_GC -t_join bgc_t_join; -#endif //BACKGROUND_GC +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + args.card_bundle_table = g_gc_card_bundle_table; +#endif -#endif //MULTIPLE_HEAPS + args.lowest_address = g_gc_lowest_address; + args.highest_address = g_gc_highest_address; + args.ephemeral_low = ephemeral_low; + args.ephemeral_high = ephemeral_high; -#define spin_and_switch(count_to_spin, expr) \ -{ \ - for (int j = 0; j < count_to_spin; j++) \ - { \ - if (expr) \ - { \ - break;\ - } \ - YieldProcessor(); \ - } \ - if (!(expr)) \ - { \ - GCToOSInterface::YieldThread(0); \ - } \ -} +#ifdef USE_REGIONS + region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr); +#endif //USE_REGIONS -#define spin_and_wait(count_to_spin, expr) \ -{ \ - while (!expr) \ - { \ - for (int j = 0; j < count_to_spin; j++) \ - { \ - if (expr) \ - { \ - break; \ - } \ - YieldProcessor (); \ - } \ - if (!(expr)) \ - { \ - GCToOSInterface::YieldThread (0); \ - } \ - } \ + GCToEEInterface::StompWriteBarrier(&args); } -#ifdef BACKGROUND_GC - -#define max_pending_allocs 64 +class mark; +class generation; +class heap_segment; +class CObjectHeader; +class dynamic_data; +class l_heap; +class sorted_table; +class c_synchronize; -class exclusive_sync -{ - VOLATILE(uint8_t*) rwp_object; - VOLATILE(int32_t) needs_checking; +#ifdef FEATURE_PREMORTEM_FINALIZATION +static +HRESULT AllocateCFinalize(CFinalize **pCFinalize); +#endif // FEATURE_PREMORTEM_FINALIZATION - int spin_count; +uint8_t* tree_search (uint8_t* tree, uint8_t* old_address); - uint8_t cache_separator[HS_CACHE_LINE_SIZE - (sizeof (spin_count) + sizeof (needs_checking) + sizeof (rwp_object))]; - // TODO - perhaps each object should be on its own cache line... - VOLATILE(uint8_t*) alloc_objects[max_pending_allocs]; +/* per heap static initialization */ +#if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS) +uint32_t* gc_heap::mark_array; +#endif //BACKGROUND_GC && !MULTIPLE_HEAPS - int find_free_index () - { - for (int i = 0; i < max_pending_allocs; i++) - { - if (alloc_objects [i] == (uint8_t*)0) - { - return i; - } - } +uint8_t** gc_heap::g_mark_list; +uint8_t** gc_heap::g_mark_list_copy; +size_t gc_heap::mark_list_size; +size_t gc_heap::g_mark_list_total_size; +bool gc_heap::mark_list_overflow; +#ifdef USE_REGIONS +uint8_t*** gc_heap::g_mark_list_piece; +size_t gc_heap::g_mark_list_piece_size; +size_t gc_heap::g_mark_list_piece_total_size; +#endif //USE_REGIONS - return -1; - } +seg_mapping* seg_mapping_table; -public: - void init() - { - spin_count = 32 * (g_num_processors - 1); - rwp_object = 0; - needs_checking = 0; - for (int i = 0; i < max_pending_allocs; i++) - { - alloc_objects [i] = (uint8_t*)0; - } - } +#ifdef FEATURE_BASICFREEZE +sorted_table* gc_heap::seg_table; +#endif //FEATURE_BASICFREEZE - void check() - { - for (int i = 0; i < max_pending_allocs; i++) - { - if (alloc_objects [i] != (uint8_t*)0) - { - FATAL_GC_ERROR(); - } - } - } +#ifdef MULTIPLE_HEAPS +GCEvent gc_heap::ee_suspend_event; +size_t gc_heap::min_gen0_balance_delta = 0; +size_t gc_heap::min_balance_threshold = 0; +#endif //MULTIPLE_HEAPS - void bgc_mark_set (uint8_t* obj) - { - dprintf (3, ("cm: probing %p", obj)); -retry: - if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0) - { - // If we spend too much time spending all the allocs, - // consider adding a high water mark and scan up - // to that; we'll need to interlock in done when - // we update the high watermark. - for (int i = 0; i < max_pending_allocs; i++) - { - if (obj == alloc_objects[i]) - { - needs_checking = 0; - dprintf (3, ("cm: will spin")); - spin_and_switch (spin_count, (obj != alloc_objects[i])); - goto retry; - } - } +VOLATILE(BOOL) gc_heap::gc_started; - rwp_object = obj; - needs_checking = 0; - dprintf (3, ("cm: set %p", obj)); - return; - } - else - { - spin_and_switch (spin_count, (needs_checking == 0)); - goto retry; - } - } +#ifdef MULTIPLE_HEAPS +GCEvent gc_heap::gc_start_event; +bool gc_heap::gc_thread_no_affinitize_p = false; +uintptr_t process_mask = 0; - int uoh_alloc_set (uint8_t* obj) - { - if (!gc_heap::cm_in_progress) - { - return -1; - } +int gc_heap::n_heaps; // current number of heaps +int gc_heap::n_max_heaps; // maximum number of heaps -retry: - dprintf (3, ("uoh alloc: probing %p", obj)); +gc_heap** gc_heap::g_heaps; - if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0) - { - if (obj == rwp_object) - { - needs_checking = 0; - spin_and_switch (spin_count, (obj != rwp_object)); - goto retry; - } - else - { - int cookie = find_free_index(); +#if !defined(USE_REGIONS) || defined(_DEBUG) +size_t* gc_heap::g_promoted; +#endif //!USE_REGIONS || _DEBUG - if (cookie != -1) - { - alloc_objects[cookie] = obj; - needs_checking = 0; - //if (cookie >= 4) - //{ - // GCToOSInterface::DebugBreak(); - //} - - dprintf (3, ("uoh alloc: set %p at %d", obj, cookie)); - return cookie; - } - else - { - needs_checking = 0; - dprintf (3, ("uoh alloc: setting %p will spin to acquire a free index", obj)); - spin_and_switch (spin_count, (find_free_index () != -1)); - goto retry; - } - } - } - else - { - dprintf (3, ("uoh alloc: will spin on checking %p", obj)); - spin_and_switch (spin_count, (needs_checking == 0)); - goto retry; - } - } +#ifdef MH_SC_MARK +int* gc_heap::g_mark_stack_busy; +#endif //MH_SC_MARK - void bgc_mark_done () - { - dprintf (3, ("cm: release lock on %p", (uint8_t *)rwp_object)); - rwp_object = 0; - } +#ifdef BACKGROUND_GC +size_t* gc_heap::g_bpromoted; +#endif //BACKGROUND_GC - void uoh_alloc_done_with_index (int index) - { - dprintf (3, ("uoh alloc: release lock on %p based on %d", (uint8_t *)alloc_objects[index], index)); - assert ((index >= 0) && (index < max_pending_allocs)); - alloc_objects[index] = (uint8_t*)0; - } +BOOL gc_heap::gradual_decommit_in_progress_p = FALSE; +size_t gc_heap::max_decommit_step_size = 0; +#else //MULTIPLE_HEAPS - void uoh_alloc_done (uint8_t* obj) - { - if (!gc_heap::cm_in_progress) - { - return; - } - - for (int i = 0; i < max_pending_allocs; i++) - { - if (alloc_objects [i] == obj) - { - uoh_alloc_done_with_index(i); - return; - } - } - dprintf (3, ("uoh alloc: could not release lock on %p", obj)); - } -}; +#if !defined(USE_REGIONS) || defined(_DEBUG) +size_t gc_heap::g_promoted; +#endif //!USE_REGIONS || _DEBUG +#ifdef BACKGROUND_GC +size_t gc_heap::g_bpromoted; #endif //BACKGROUND_GC -void reset_memory (uint8_t* o, size_t sizeo); - -#ifdef WRITE_WATCH +// this is just to have fewer #ifdefs in code shared between WKS and SVR +// for filling out ScanContext structs +extern const int n_heaps = 1; -#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -static bool virtual_alloc_hardware_write_watch = false; -#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +#endif //MULTIPLE_HEAPS -static bool hardware_write_watch_capability = false; +size_t gc_heap::card_table_element_layout[total_bookkeeping_elements + 1]; +uint8_t* gc_heap::bookkeeping_start = nullptr; +#ifdef USE_REGIONS +uint8_t* gc_heap::bookkeeping_covered_committed = nullptr; +size_t gc_heap::bookkeeping_sizes[total_bookkeeping_elements]; +#endif //USE_REGIONS +size_t gc_heap::reserved_memory = 0; +size_t gc_heap::reserved_memory_limit = 0; +BOOL gc_heap::g_low_memory_status; -inline bool can_use_hardware_write_watch() -{ - return hardware_write_watch_capability; -} +gc_reason gc_trigger_reason = reason_empty; +gc_latency_level gc_heap::latency_level = latency_level_default; -inline bool can_use_write_watch_for_card_table() -{ -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - return true; -#else - return can_use_hardware_write_watch(); -#endif -} +gc_mechanisms gc_heap::settings; -#else //WRITE_WATCH -#define mem_reserve (MEM_RESERVE) -#endif //WRITE_WATCH +gc_history_global gc_heap::gc_data_global; -void WaitLongerNoInstru (int i) -{ - // every 8th attempt: - bool bToggleGC = GCToEEInterface::EnablePreemptiveGC(); +uint64_t gc_heap::gc_last_ephemeral_decommit_time = 0; - // if we're waiting for gc to finish, we should block immediately - if (g_fSuspensionPending == 0) - { - if (g_num_processors > 1) - { - YieldProcessor(); // indicate to the processor that we are spinning - if (i & 0x01f) - GCToOSInterface::YieldThread (0); - else - GCToOSInterface::Sleep (5); - } - else - GCToOSInterface::Sleep (5); - } +CLRCriticalSection gc_heap::check_commit_cs; - // If CLR is hosted, a thread may reach here while it is in preemptive GC mode, - // or it has no Thread object, in order to force a task to yield, or to triger a GC. - // It is important that the thread is going to wait for GC. Otherwise the thread - // is in a tight loop. If the thread has high priority, the perf is going to be very BAD. - if (bToggleGC) - { -#ifdef _DEBUG - // In debug builds, all enter_spin_lock operations go through this code. If a GC has - // started, it is important to block until the GC thread calls set_gc_done (since it is - // guaranteed to have cleared g_TrapReturningThreads by this point). This avoids livelock - // conditions which can otherwise occur if threads are allowed to spin in this function - // (and therefore starve the GC thread) between the point when the GC thread sets the - // WaitForGC event and the point when the GC thread clears g_TrapReturningThreads. - if (gc_heap::gc_started) - { - gc_heap::wait_for_gc_done(); - } -#endif // _DEBUG - GCToEEInterface::DisablePreemptiveGC(); - } - else if (g_fSuspensionPending > 0) - { - g_theGCHeap->WaitUntilGCComplete(); - } -} +#ifdef COMMITTED_BYTES_SHADOW +CLRCriticalSection gc_heap::decommit_lock; +#endif //COMMITTED_BYTES_SHADOW -inline -static void safe_switch_to_thread() -{ - bool cooperative_mode = gc_heap::enable_preemptive(); +size_t gc_heap::current_total_committed = 0; - GCToOSInterface::YieldThread(0); +size_t gc_heap::committed_by_oh[recorded_committed_bucket_counts]; - gc_heap::disable_preemptive(cooperative_mode); -} +size_t gc_heap::current_total_committed_bookkeeping = 0; -#define check_msl_status(msg, size) if (msl_status == msl_retry_different_heap) \ - { \ - dprintf (5555, ("h%d RETRY %s(%Id)", heap_number, msg, size)); \ - return a_state_retry_allocate; \ - } +BOOL gc_heap::reset_mm_p = TRUE; -static const int32_t lock_free = -1; -static const int32_t lock_taken = 0; -static const int32_t lock_decommissioned = 1; +#ifdef FEATURE_EVENT_TRACE +bool gc_heap::informational_event_enabled_p = false; +uint64_t* gc_heap::gc_time_info = 0; -// If our heap got decommissioned, we need to try an existing heap. -//inline -bool gc_heap::should_move_heap (GCSpinLock* msl) -{ -#ifdef MULTIPLE_HEAPS - if (msl->lock == lock_decommissioned) - { - dprintf (5555, ("heap#%d got decommissioned! need to retry", heap_number)); - } - return (msl->lock == lock_decommissioned); -#else //MULTIPLE_HEAPS - return false; -#endif //MULTIPLE_HEAPS -} +#ifdef BACKGROUND_GC +uint64_t* gc_heap::bgc_time_info = 0; +#endif //BACKGROUND_GC -// All the places where we could be stopped because there was a suspension should call should_move_heap to check if we need to return -// so we can try another heap or we can continue the allocation on the same heap. -enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl) -{ - do - { -#ifdef DYNAMIC_HEAP_COUNT - uint64_t start = GetHighPrecisionTimeStamp(); -#endif //DYNAMIC_HEAP_COUNT +size_t gc_heap::physical_memory_from_config = 0; - unsigned int i = 0; - while (VolatileLoad (&msl->lock) != lock_free) - { - if (should_move_heap (msl)) - { - return msl_retry_different_heap; - } - if ((++i & 7) && !IsGCInProgress ()) - { - if (g_num_processors > 1) - { -#ifndef MULTIPLE_HEAPS - int spin_count = 32 * yp_spin_count_unit; -#else //!MULTIPLE_HEAPS - int spin_count = yp_spin_count_unit; -#endif //!MULTIPLE_HEAPS - for (int j = 0; j < spin_count; j++) - { - if (VolatileLoad (&msl->lock) == lock_free || IsGCInProgress ()) - break; - // give the HT neighbor a chance to run - YieldProcessor (); - } - if (VolatileLoad (&msl->lock) != lock_free && !IsGCInProgress ()) - { -#ifdef DYNAMIC_HEAP_COUNT - start -= GetHighPrecisionTimeStamp(); -#endif //DYNAMIC_HEAP_COUNT - safe_switch_to_thread (); -#ifdef DYNAMIC_HEAP_COUNT - start += GetHighPrecisionTimeStamp(); -#endif //DYNAMIC_HEAP_COUNT - } - } - else - { - safe_switch_to_thread (); - } - } - else - { -#ifdef DYNAMIC_HEAP_COUNT - start -= GetHighPrecisionTimeStamp(); -#endif //DYNAMIC_HEAP_COUNT - WaitLongerNoInstru (i); -#ifdef DYNAMIC_HEAP_COUNT - start += GetHighPrecisionTimeStamp(); -#endif //DYNAMIC_HEAP_COUNT - } - } -#ifdef DYNAMIC_HEAP_COUNT - uint64_t end = GetHighPrecisionTimeStamp(); - Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start); - dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time)); -#endif //DYNAMIC_HEAP_COUNT - } - while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free); +size_t gc_heap::gen0_min_budget_from_config = 0; - return msl_entered; -} +size_t gc_heap::gen0_max_budget_from_config = 0; -inline -enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl) -{ - if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) == lock_free) - return msl_entered; +int gc_heap::high_mem_percent_from_config = 0; - return enter_spin_lock_msl_helper (msl); -} +bool gc_heap::use_frozen_segments_p = false; -// -// We need the following methods to have volatile arguments, so that they can accept -// raw pointers in addition to the results of the & operator on Volatile. -// this will never be used for the more_space_lock_xxx, which is why -// "lock_decommissioned" cannot happen. -inline -static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock) -{ -retry: +#ifdef FEATURE_LOH_COMPACTION +gc_heap::etw_loh_compact_info* gc_heap::loh_compact_info; +#endif //FEATURE_LOH_COMPACTION +#endif //FEATURE_EVENT_TRACE - if (Interlocked::CompareExchange(lock, lock_taken, lock_free) != lock_free) - { - unsigned int i = 0; - while (VolatileLoad(lock) != lock_free) - { - // will never be used for more_space_lock_xxx - assert (VolatileLoad(lock) != lock_decommissioned); - if ((++i & 7) && !IsGCInProgress()) - { - if (g_num_processors > 1) - { -#ifndef MULTIPLE_HEAPS - int spin_count = 32 * yp_spin_count_unit; -#else //!MULTIPLE_HEAPS - int spin_count = yp_spin_count_unit; -#endif //!MULTIPLE_HEAPS - for (int j = 0; j < spin_count; j++) - { - if (VolatileLoad(lock) == lock_free || IsGCInProgress()) - break; - YieldProcessor(); // indicate to the processor that we are spinning - } - if (VolatileLoad(lock) != lock_free && !IsGCInProgress()) - { - safe_switch_to_thread(); - } - } - else - { - safe_switch_to_thread(); - } - } - else - { - WaitLongerNoInstru(i); - } - } - goto retry; - } -} +bool gc_heap::hard_limit_config_p = false; -inline -static BOOL try_enter_spin_lock_noinstru(RAW_KEYWORD(volatile) int32_t* lock) -{ - return (Interlocked::CompareExchange(&*lock, lock_taken, lock_free) == lock_free); -} +#if defined(SHORT_PLUGS) && !defined(USE_REGIONS) +double gc_heap::short_plugs_pad_ratio = 0; +#endif //SHORT_PLUGS && !USE_REGIONS -inline -static void leave_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock) -{ - VolatileStore((int32_t*)lock, lock_free); -} +int gc_heap::generation_skip_ratio_threshold = 0; +int gc_heap::conserve_mem_setting = 0; +bool gc_heap::spin_count_unit_config_p = false; -#ifdef _DEBUG +uint64_t gc_heap::suspended_start_time = 0; +uint64_t gc_heap::end_gc_time = 0; +uint64_t gc_heap::total_suspended_time = 0; +uint64_t gc_heap::process_start_time = 0; +last_recorded_gc_info gc_heap::last_ephemeral_gc_info; +last_recorded_gc_info gc_heap::last_full_blocking_gc_info; -inline -static void enter_spin_lock (GCSpinLock *pSpinLock) -{ - enter_spin_lock_noinstru (&pSpinLock->lock); - assert (pSpinLock->holding_thread == (Thread*)-1); - pSpinLock->holding_thread = GCToEEInterface::GetThread(); -} +uint64_t gc_heap::last_alloc_reset_suspended_end_time = 0; +size_t gc_heap::max_peak_heap_size = 0; +VOLATILE(size_t) gc_heap::llc_size = 0; -inline -static BOOL try_enter_spin_lock(GCSpinLock *pSpinLock) -{ - BOOL ret = try_enter_spin_lock_noinstru(&pSpinLock->lock); - if (ret) - pSpinLock->holding_thread = GCToEEInterface::GetThread(); - return ret; -} +#ifdef BACKGROUND_GC +last_recorded_gc_info gc_heap::last_bgc_info[2]; +VOLATILE(bool) gc_heap::is_last_recorded_bgc = false; +VOLATILE(int) gc_heap::last_bgc_info_index = 0; +#endif //BACKGROUND_GC -inline -static void leave_spin_lock(GCSpinLock *pSpinLock) -{ - bool gc_thread_p = GCToEEInterface::WasCurrentThreadCreatedByGC(); - pSpinLock->released_by_gc_p = gc_thread_p; - pSpinLock->holding_thread = (Thread*) -1; - if (pSpinLock->lock != lock_free) - leave_spin_lock_noinstru(&pSpinLock->lock); -} +#ifdef DYNAMIC_HEAP_COUNT +size_t gc_heap::hc_change_cancelled_count_prep = 0; +#ifdef BACKGROUND_GC +int gc_heap::bgc_th_creation_hist_index = 0; +gc_heap::bgc_thread_creation_history gc_heap::bgc_th_creation_hist[max_bgc_thread_creation_count]; +size_t gc_heap::bgc_th_count_created = 0; +size_t gc_heap::bgc_th_count_created_th_existed = 0; +size_t gc_heap::bgc_th_count_creation_failed = 0; +size_t gc_heap::bgc_init_gc_index = 0; +VOLATILE(short) gc_heap::bgc_init_n_heaps = 0; +size_t gc_heap::hc_change_cancelled_count_bgc = 0; +#endif //BACKGROUND_GC +#endif //DYNAMIC_HEAP_COUNT -#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) \ - _ASSERTE((pSpinLock)->holding_thread == GCToEEInterface::GetThread()); +#if defined(HOST_64BIT) +// consider putting this in dynamic data - +// we may want different values for workstation +// and server GC. +size_t gc_heap::youngest_gen_desired_th; +#endif //HOST_64BIT -#define ASSERT_NOT_HOLDING_SPIN_LOCK(pSpinLock) \ - _ASSERTE((pSpinLock)->holding_thread != GCToEEInterface::GetThread()); +uint64_t gc_heap::mem_one_percent = 0; -#else //_DEBUG +uint32_t gc_heap::high_memory_load_th = 0; -//In the concurrent version, the Enable/DisablePreemptiveGC is optional because -//the gc thread call WaitLonger. -void WaitLonger (int i -#ifdef SYNCHRONIZATION_STATS - , GCSpinLock* spin_lock -#endif //SYNCHRONIZATION_STATS - ) -{ -#ifdef SYNCHRONIZATION_STATS - (spin_lock->num_wait_longer)++; -#endif //SYNCHRONIZATION_STATS +uint32_t gc_heap::m_high_memory_load_th; - // every 8th attempt: - bool bToggleGC = GCToEEInterface::EnablePreemptiveGC(); - assert (bToggleGC); +uint32_t gc_heap::v_high_memory_load_th; - // if we're waiting for gc to finish, we should block immediately - if (!gc_heap::gc_started) - { -#ifdef SYNCHRONIZATION_STATS - (spin_lock->num_switch_thread_w)++; -#endif //SYNCHRONIZATION_STATS - if (g_num_processors > 1) - { - YieldProcessor(); // indicate to the processor that we are spinning - if (i & 0x01f) - GCToOSInterface::YieldThread (0); - else - GCToOSInterface::Sleep (5); - } - else - GCToOSInterface::Sleep (5); - } +uint32_t gc_heap::almost_high_memory_load_th; - // If CLR is hosted, a thread may reach here while it is in preemptive GC mode, - // or it has no Thread object, in order to force a task to yield, or to triger a GC. - // It is important that the thread is going to wait for GC. Otherwise the thread - // is in a tight loop. If the thread has high priority, the perf is going to be very BAD. - if (gc_heap::gc_started) - { - gc_heap::wait_for_gc_done(); - } +bool gc_heap::is_restricted_physical_mem; - if (bToggleGC) - { -#ifdef SYNCHRONIZATION_STATS - (spin_lock->num_disable_preemptive_w)++; -#endif //SYNCHRONIZATION_STATS - GCToEEInterface::DisablePreemptiveGC(); - } -} +uint64_t gc_heap::total_physical_mem = 0; -inline -static void enter_spin_lock (GCSpinLock* spin_lock) -{ -retry: +uint64_t gc_heap::entry_available_physical_mem = 0; - if (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) != lock_free) - { - unsigned int i = 0; - while (spin_lock->lock != lock_free) - { - assert (spin_lock->lock != lock_decommissioned); - if ((++i & 7) && !gc_heap::gc_started) - { - if (g_num_processors > 1) - { -#ifndef MULTIPLE_HEAPS - int spin_count = 32 * yp_spin_count_unit; -#else //!MULTIPLE_HEAPS - int spin_count = yp_spin_count_unit; -#endif //!MULTIPLE_HEAPS - for (int j = 0; j < spin_count; j++) - { - if (spin_lock->lock == lock_free || gc_heap::gc_started) - break; - YieldProcessor(); // indicate to the processor that we are spinning - } - if (spin_lock->lock != lock_free && !gc_heap::gc_started) - { -#ifdef SYNCHRONIZATION_STATS - (spin_lock->num_switch_thread)++; -#endif //SYNCHRONIZATION_STATS - bool cooperative_mode = gc_heap::enable_preemptive (); +size_t gc_heap::heap_hard_limit = 0; - GCToOSInterface::YieldThread(0); +size_t gc_heap::heap_hard_limit_oh[total_oh_count]; - gc_heap::disable_preemptive (cooperative_mode); - } - } - else - GCToOSInterface::YieldThread(0); - } - else - { - WaitLonger(i -#ifdef SYNCHRONIZATION_STATS - , spin_lock -#endif //SYNCHRONIZATION_STATS - ); - } - } - goto retry; - } -} +#ifdef USE_REGIONS -inline -static BOOL try_enter_spin_lock(GCSpinLock* spin_lock) -{ - return (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) == lock_free); -} +size_t gc_heap::regions_range = 0; -inline -static void leave_spin_lock (GCSpinLock * spin_lock) -{ - spin_lock->lock = lock_free; -} +#endif //USE_REGIONS -#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) +bool affinity_config_specified_p = false; -#endif //_DEBUG +#ifdef USE_REGIONS +region_allocator global_region_allocator; +uint8_t*(*initial_regions)[total_generation_count][2] = nullptr; +size_t gc_heap::region_count = 0; -bool gc_heap::enable_preemptive () -{ - return GCToEEInterface::EnablePreemptiveGC(); -} +gc_heap::region_info* gc_heap::map_region_to_generation = nullptr; +gc_heap::region_info* gc_heap::map_region_to_generation_skewed = nullptr; -void gc_heap::disable_preemptive (bool restore_cooperative) -{ - if (restore_cooperative) - { - GCToEEInterface::DisablePreemptiveGC(); - } -} +#endif //USE_REGIONS -typedef void ** PTR_PTR; -inline -void memclr ( uint8_t* mem, size_t size) -{ - dprintf (3, ("MEMCLR: %p, %zd", mem, size)); - assert ((size & (sizeof(PTR_PTR)-1)) == 0); - assert (sizeof(PTR_PTR) == DATA_ALIGNMENT); - memset (mem, 0, size); -} +#ifdef BACKGROUND_GC +GCEvent gc_heap::bgc_start_event; +gc_mechanisms gc_heap::saved_bgc_settings; -inline -ptrdiff_t round_down (ptrdiff_t add, int pitch) -{ - return ((add / pitch) * pitch); -} +gc_history_global gc_heap::bgc_data_global; -#if defined(FEATURE_STRUCTALIGN) && defined(RESPECT_LARGE_ALIGNMENT) -// FEATURE_STRUCTALIGN allows the compiler to dictate the alignment, -// i.e, if a larger alignment matters or is beneficial, the compiler -// generated info tells us so. RESPECT_LARGE_ALIGNMENT is just the -// converse - it's a heuristic for the GC to use a larger alignment. -#error FEATURE_STRUCTALIGN should imply !RESPECT_LARGE_ALIGNMENT -#endif +GCEvent gc_heap::background_gc_done_event; -#if defined(FEATURE_STRUCTALIGN) && defined(FEATURE_LOH_COMPACTION) -#error FEATURE_STRUCTALIGN and FEATURE_LOH_COMPACTION are mutually exclusive -#endif +GCEvent gc_heap::ee_proceed_event; -// Returns true if two pointers have the same large (double than normal) alignment. -inline -BOOL same_large_alignment_p (uint8_t* p1, uint8_t* p2) -{ -#ifdef RESPECT_LARGE_ALIGNMENT - const size_t LARGE_ALIGNMENT_MASK = 2 * DATA_ALIGNMENT - 1; - return ((((size_t)p1 ^ (size_t)p2) & LARGE_ALIGNMENT_MASK) == 0); -#else - UNREFERENCED_PARAMETER(p1); - UNREFERENCED_PARAMETER(p2); - return TRUE; -#endif // RESPECT_LARGE_ALIGNMENT -} +bool gc_heap::gc_can_use_concurrent = false; -// Determines the padding size required to fix large alignment during relocation. -inline -size_t switch_alignment_size (BOOL already_padded_p) -{ -#ifndef RESPECT_LARGE_ALIGNMENT - assert (!"Should not be called"); -#endif // RESPECT_LARGE_ALIGNMENT +bool gc_heap::temp_disable_concurrent_p = false; - if (already_padded_p) - return DATA_ALIGNMENT; - else - return Align (min_obj_size) | DATA_ALIGNMENT; -} +uint32_t gc_heap::cm_in_progress = FALSE; -#ifdef FEATURE_STRUCTALIGN -void set_node_aligninfo (uint8_t *node, int requiredAlignment, ptrdiff_t pad); -void clear_node_aligninfo (uint8_t *node); -#else // FEATURE_STRUCTALIGN -#define node_realigned(node) (((plug_and_reloc*)(node))[-1].reloc & 1) -void set_node_realigned (uint8_t* node); -void clear_node_realigned(uint8_t* node); -#endif // FEATURE_STRUCTALIGN +BOOL gc_heap::dont_restart_ee_p = FALSE; -inline -size_t AlignQword (size_t nbytes) -{ -#ifdef FEATURE_STRUCTALIGN - // This function is used to align everything on the large object - // heap to an 8-byte boundary, to reduce the number of unaligned - // accesses to (say) arrays of doubles. With FEATURE_STRUCTALIGN, - // the compiler dictates the optimal alignment instead of having - // a heuristic in the GC. - return Align (nbytes); -#else // FEATURE_STRUCTALIGN - return (nbytes + 7) & ~7; -#endif // FEATURE_STRUCTALIGN -} +BOOL gc_heap::keep_bgc_threads_p = FALSE; -inline -BOOL Aligned (size_t n) -{ - return (n & ALIGNCONST) == 0; -} +GCEvent gc_heap::bgc_threads_sync_event; -#define OBJECT_ALIGNMENT_OFFSET (sizeof(MethodTable *)) +BOOL gc_heap::do_ephemeral_gc_p = FALSE; -#ifdef FEATURE_STRUCTALIGN -#define MAX_STRUCTALIGN OS_PAGE_SIZE -#else // FEATURE_STRUCTALIGN -#define MAX_STRUCTALIGN 0 -#endif // FEATURE_STRUCTALIGN +BOOL gc_heap::do_concurrent_p = FALSE; -#ifdef FEATURE_STRUCTALIGN -inline -ptrdiff_t AdjustmentForMinPadSize(ptrdiff_t pad, int requiredAlignment) -{ - // The resulting alignpad must be either 0 or at least min_obj_size. - // Note that by computing the following difference on unsigned types, - // we can do the range check 0 < alignpad < min_obj_size with a - // single conditional branch. - if ((size_t)(pad - DATA_ALIGNMENT) < Align (min_obj_size) - DATA_ALIGNMENT) - { - return requiredAlignment; - } - return 0; -} +size_t gc_heap::ephemeral_fgc_counts[max_generation]; +VOLATILE(c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free; -inline -ptrdiff_t ComputeStructAlignPad (uint8_t* plug, int requiredAlignment, size_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET) -{ - return StructAlign (plug, requiredAlignment, alignmentOffset) - plug; -} +VOLATILE(BOOL) gc_heap::gc_background_running = FALSE; +#endif //BACKGROUND_GC -BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment) -{ - return StructAlign (ptr, requiredAlignment) == ptr; -} +#ifdef USE_REGIONS +#ifdef MULTIPLE_HEAPS +uint8_t* gc_heap::gc_low; +uint8_t* gc_heap::gc_high; +#endif //MULTIPLE_HEAPS +VOLATILE(uint8_t*) gc_heap::ephemeral_low; +VOLATILE(uint8_t*) gc_heap::ephemeral_high; +#endif //USE_REGIONS -inline -ptrdiff_t ComputeMaxStructAlignPad (int requiredAlignment) -{ - if (requiredAlignment == DATA_ALIGNMENT) - return 0; - // Since a non-zero alignment padding cannot be less than min_obj_size (so we can fit the - // alignment padding object), the worst-case alignment padding is correspondingly larger - // than the required alignment. - return requiredAlignment + Align (min_obj_size) - DATA_ALIGNMENT; -} +#ifndef MULTIPLE_HEAPS +#ifdef SPINLOCK_HISTORY +int gc_heap::spinlock_info_index = 0; +spinlock_info gc_heap::last_spinlock_info[max_saved_spinlock_info]; +allocation_state gc_heap::current_uoh_alloc_state = (allocation_state)-1; +#endif //SPINLOCK_HISTORY -inline -ptrdiff_t ComputeMaxStructAlignPadLarge (int requiredAlignment) -{ - if (requiredAlignment <= get_alignment_constant (TRUE)+1) - return 0; - // This is the same as ComputeMaxStructAlignPad, except that in addition to leaving space - // for padding before the actual object, it also leaves space for filling a gap after the - // actual object. This is needed on the large object heap, as the outer allocation functions - // don't operate on an allocation context (which would have left space for the final gap). - return requiredAlignment + Align (min_obj_size) * 2 - DATA_ALIGNMENT; -} +uint32_t gc_heap::fgn_maxgen_percent = 0; +size_t gc_heap::fgn_last_alloc = 0; -#else // FEATURE_STRUCTALIGN -#define ComputeMaxStructAlignPad(requiredAlignment) 0 -#define ComputeMaxStructAlignPadLarge(requiredAlignment) 0 -#endif // FEATURE_STRUCTALIGN +int gc_heap::generation_skip_ratio = 100; +#ifdef FEATURE_CARD_MARKING_STEALING +VOLATILE(size_t) gc_heap::n_eph_soh = 0; +VOLATILE(size_t) gc_heap::n_gen_soh = 0; +VOLATILE(size_t) gc_heap::n_eph_loh = 0; +VOLATILE(size_t) gc_heap::n_gen_loh = 0; +#endif //FEATURE_CARD_MARKING_STEALING -//CLR_SIZE is the max amount of bytes from gen0 that is set to 0 in one chunk -#ifdef SERVER_GC -#define CLR_SIZE ((size_t)(8*1024+32)) -#else //SERVER_GC -#define CLR_SIZE ((size_t)(8*1024+32)) -#endif //SERVER_GC +uint64_t gc_heap::loh_alloc_since_cg = 0; -#define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN) -// When we fit into the free list we need an extra of a min obj -#define END_SPACE_AFTER_GC_FL (END_SPACE_AFTER_GC + Align (min_obj_size)) +BOOL gc_heap::elevation_requested = FALSE; -#if defined(BACKGROUND_GC) && !defined(USE_REGIONS) -#define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE) -#else -#define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE) -#endif //BACKGROUND_GC && !USE_REGIONS +BOOL gc_heap::last_gc_before_oom = FALSE; -// This is always power of 2. -#ifdef HOST_64BIT -const size_t min_segment_size_hard_limit = 1024*1024*16; -#else //HOST_64BIT -const size_t min_segment_size_hard_limit = 1024*1024*4; -#endif //HOST_64BIT +BOOL gc_heap::sufficient_gen0_space_p = FALSE; -#ifndef HOST_64BIT -// Max size of heap hard limit (2^31) to be able to be aligned and rounded up on power of 2 and not overflow -const size_t max_heap_hard_limit = (size_t)2 * (size_t)1024 * (size_t)1024 * (size_t)1024; -#endif //!HOST_64BIT +BOOL gc_heap::decide_promote_gen1_pins_p = TRUE; +#ifdef BACKGROUND_GC +uint8_t* gc_heap::background_saved_lowest_address = 0; +uint8_t* gc_heap::background_saved_highest_address = 0; +uint8_t* gc_heap::next_sweep_obj = 0; +uint8_t* gc_heap::current_sweep_pos = 0; +#ifdef DOUBLY_LINKED_FL +heap_segment* gc_heap::current_sweep_seg = 0; +#endif //DOUBLY_LINKED_FL +exclusive_sync* gc_heap::bgc_alloc_lock; +#endif //BACKGROUND_GC -#ifdef SERVER_GC +oom_history gc_heap::oom_info; -#ifdef HOST_64BIT +int gc_heap::oomhist_index_per_heap = 0; -#define INITIAL_ALLOC ((size_t)((size_t)4*1024*1024*1024)) -#define LHEAP_ALLOC ((size_t)(1024*1024*256)) +oom_history gc_heap::oomhist_per_heap[max_oom_history_count]; -#else +fgm_history gc_heap::fgm_result; -#define INITIAL_ALLOC ((size_t)(1024*1024*64)) -#define LHEAP_ALLOC ((size_t)(1024*1024*32)) +size_t gc_heap::allocated_since_last_gc[total_oh_count]; -#endif // HOST_64BIT +#ifndef USE_REGIONS +BOOL gc_heap::ro_segments_in_range = FALSE; +uint8_t* gc_heap::ephemeral_low; +uint8_t* gc_heap::ephemeral_high; +BOOL gc_heap::ephemeral_promotion; +uint8_t* gc_heap::saved_ephemeral_plan_start[ephemeral_generation_count]; +size_t gc_heap::saved_ephemeral_plan_start_size[ephemeral_generation_count]; +#endif //!USE_REGIONS -#else //SERVER_GC +uint8_t* gc_heap::lowest_address; -#ifdef HOST_64BIT +uint8_t* gc_heap::highest_address; -#define INITIAL_ALLOC ((size_t)(1024*1024*256)) -#define LHEAP_ALLOC ((size_t)(1024*1024*128)) +short* gc_heap::brick_table; -#else +uint32_t* gc_heap::card_table; -#define INITIAL_ALLOC ((size_t)(1024*1024*16)) -#define LHEAP_ALLOC ((size_t)(1024*1024*16)) +#ifdef CARD_BUNDLE +uint32_t* gc_heap::card_bundle_table; +#endif //CARD_BUNDLE -#endif // HOST_64BIT +uint8_t* gc_heap::gc_low = 0; -#endif //SERVER_GC +uint8_t* gc_heap::gc_high = 0; -const size_t etw_allocation_tick = 100*1024; +#ifndef USE_REGIONS +uint8_t* gc_heap::demotion_low; -const size_t low_latency_alloc = 256*1024; +uint8_t* gc_heap::demotion_high; -const size_t fgn_check_quantum = 2*1024*1024; +uint8_t* gc_heap::last_gen1_pin_end; +#endif //!USE_REGIONS -#ifdef MH_SC_MARK -const int max_snoop_level = 128; -#endif //MH_SC_MARK +gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons; -#ifdef CARD_BUNDLE -//threshold of heap size to turn on card bundles. -#define SH_TH_CARD_BUNDLE (40*1024*1024) -#define MH_TH_CARD_BUNDLE (180*1024*1024) -#endif //CARD_BUNDLE +size_t gc_heap::etw_allocation_running_amount[total_oh_count]; -// min size to decommit to make the OS call worthwhile -#define MIN_DECOMMIT_SIZE (100*OS_PAGE_SIZE) +uint64_t gc_heap::total_alloc_bytes_soh = 0; -// max size to decommit per millisecond -#define DECOMMIT_SIZE_PER_MILLISECOND (160*1024) +uint64_t gc_heap::total_alloc_bytes_uoh = 0; -// time in milliseconds between decommit steps -#define DECOMMIT_TIME_STEP_MILLISECONDS (100) +int gc_heap::gc_policy = 0; -inline -size_t align_on_page (size_t add) -{ - return ((add + OS_PAGE_SIZE - 1) & ~((size_t)OS_PAGE_SIZE - 1)); -} +uint64_t gc_heap::allocation_running_time; -inline -uint8_t* align_on_page (uint8_t* add) -{ - return (uint8_t*)align_on_page ((size_t) add); -} +size_t gc_heap::allocation_running_amount; -inline -size_t align_lower_page (size_t add) -{ - return (add & ~((size_t)OS_PAGE_SIZE - 1)); -} +heap_segment* gc_heap::ephemeral_heap_segment = 0; -inline -uint8_t* align_lower_page (uint8_t* add) -{ - return (uint8_t*)align_lower_page ((size_t)add); -} +#ifdef USE_REGIONS +#ifdef STRESS_REGIONS +OBJECTHANDLE* gc_heap::pinning_handles_for_alloc = 0; +int gc_heap::ph_index_per_heap = 0; +int gc_heap::pinning_seg_interval = 2; +size_t gc_heap::num_gen0_regions = 0; +int gc_heap::sip_seg_interval = 0; +int gc_heap::sip_seg_maxgen_interval = 0; +size_t gc_heap::num_condemned_regions = 0; +#endif //STRESS_REGIONS -inline -size_t align_write_watch_lower_page (size_t add) -{ - return (add & ~(WRITE_WATCH_UNIT_SIZE - 1)); -} +region_free_list gc_heap::free_regions[count_free_region_kinds]; -inline -uint8_t* align_write_watch_lower_page (uint8_t* add) -{ - return (uint8_t*)align_lower_page ((size_t)add); -} +int gc_heap::num_regions_freed_in_sweep = 0; -inline -BOOL power_of_two_p (size_t integer) -{ - return !(integer & (integer-1)); -} +int gc_heap::regions_per_gen[max_generation + 1]; +int gc_heap::planned_regions_per_gen[max_generation + 1]; +int gc_heap::sip_maxgen_regions_per_gen[max_generation + 1]; +heap_segment* gc_heap::reserved_free_regions_sip[max_generation]; +int gc_heap::new_gen0_regions_in_plns = 0; +int gc_heap::new_regions_in_prr = 0; +int gc_heap::new_regions_in_threading = 0; -#ifdef USE_REGIONS -void region_write_barrier_settings (WriteBarrierParameters* args, - gc_heap::region_info* map_region_to_generation_skewed, - uint8_t region_shr) -{ - switch (GCConfig::GetGCWriteBarrier()) - { - default: - case GCConfig::WRITE_BARRIER_DEFAULT: - case GCConfig::WRITE_BARRIER_REGION_BIT: - // bitwise region write barrier is the default now - args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed; - args->region_shr = region_shr; - args->region_use_bitwise_write_barrier = true; - break; +size_t gc_heap::end_gen0_region_space = 0; - case GCConfig::WRITE_BARRIER_REGION_BYTE: - // bytewise region write barrier - args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed; - args->region_shr = region_shr; - assert (args->region_use_bitwise_write_barrier == false); - break; +size_t gc_heap::end_gen0_region_committed_space = 0; - case GCConfig::WRITE_BARRIER_SERVER: - // server write barrier - // args should have been zero initialized - assert (args->region_use_bitwise_write_barrier == false); - assert (args->region_to_generation_table == nullptr); - assert (args->region_shr == 0); - break; - } -} -#endif //USE_REGIONS +size_t gc_heap::gen0_pinned_free_space = 0; -void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high -#ifdef USE_REGIONS - , gc_heap::region_info* map_region_to_generation_skewed - , uint8_t region_shr -#endif //USE_REGIONS - ) -{ -#ifndef USE_REGIONS - initGCShadow(); -#endif +bool gc_heap::gen0_large_chunk_found = false; - WriteBarrierParameters args = {}; - args.operation = WriteBarrierOp::StompEphemeral; - args.is_runtime_suspended = true; - args.ephemeral_low = ephemeral_low; - args.ephemeral_high = ephemeral_high; -#ifdef USE_REGIONS - region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr); -#endif //USE_REGIONS - GCToEEInterface::StompWriteBarrier(&args); -} +size_t* gc_heap::survived_per_region = nullptr; -void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high -#ifdef USE_REGIONS - , gc_heap::region_info* map_region_to_generation_skewed - , uint8_t region_shr +size_t* gc_heap::old_card_survived_per_region = nullptr; #endif //USE_REGIONS - ) -{ - WriteBarrierParameters args = {}; - args.operation = WriteBarrierOp::Initialize; - args.is_runtime_suspended = true; - args.requires_upper_bounds_check = false; - args.card_table = g_gc_card_table; -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - args.card_bundle_table = g_gc_card_bundle_table; -#endif +BOOL gc_heap::blocking_collection = FALSE; - args.lowest_address = g_gc_lowest_address; - args.highest_address = g_gc_highest_address; - args.ephemeral_low = ephemeral_low; - args.ephemeral_high = ephemeral_high; +heap_segment* gc_heap::freeable_uoh_segment = 0; -#ifdef USE_REGIONS - region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr); -#endif //USE_REGIONS +uint64_t gc_heap::time_bgc_last = 0; - GCToEEInterface::StompWriteBarrier(&args); -} +size_t gc_heap::mark_stack_tos = 0; -//extract the low bits [0,low[ of a uint32_t -#define lowbits(wrd, bits) ((wrd) & ((1 << (bits))-1)) -//extract the high bits [high, 32] of a uint32_t -#define highbits(wrd, bits) ((wrd) & ~((1 << (bits))-1)) +size_t gc_heap::mark_stack_bos = 0; -class mark; -class generation; -class heap_segment; -class CObjectHeader; -class dynamic_data; -class l_heap; -class sorted_table; -class c_synchronize; +size_t gc_heap::mark_stack_array_length = 0; -#ifdef FEATURE_PREMORTEM_FINALIZATION -static -HRESULT AllocateCFinalize(CFinalize **pCFinalize); -#endif // FEATURE_PREMORTEM_FINALIZATION +mark* gc_heap::mark_stack_array = 0; -uint8_t* tree_search (uint8_t* tree, uint8_t* old_address); +#if defined (_DEBUG) && defined (VERIFY_HEAP) +BOOL gc_heap::verify_pinned_queue_p = FALSE; +#endif //_DEBUG && VERIFY_HEAP +uint8_t* gc_heap::oldest_pinned_plug = 0; -#ifdef USE_INTROSORT -#define _sort introsort::sort -#elif defined(USE_VXSORT) -// in this case we have do_vxsort which takes an additional range that -// all items to be sorted are contained in -// so do not #define _sort -#else //USE_INTROSORT -#define _sort qsort1 -void qsort1(uint8_t** low, uint8_t** high, unsigned int depth); -#endif //USE_INTROSORT +size_t gc_heap::num_pinned_objects = 0; -void* virtual_alloc (size_t size); -void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node = NUMA_NODE_UNDEFINED); +#ifdef FEATURE_LOH_COMPACTION +size_t gc_heap::loh_pinned_queue_tos = 0; -/* per heap static initialization */ -#if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS) -uint32_t* gc_heap::mark_array; -#endif //BACKGROUND_GC && !MULTIPLE_HEAPS +size_t gc_heap::loh_pinned_queue_bos = 0; -uint8_t** gc_heap::g_mark_list; -uint8_t** gc_heap::g_mark_list_copy; -size_t gc_heap::mark_list_size; -size_t gc_heap::g_mark_list_total_size; -bool gc_heap::mark_list_overflow; -#ifdef USE_REGIONS -uint8_t*** gc_heap::g_mark_list_piece; -size_t gc_heap::g_mark_list_piece_size; -size_t gc_heap::g_mark_list_piece_total_size; -#endif //USE_REGIONS +size_t gc_heap::loh_pinned_queue_length = 0; -seg_mapping* seg_mapping_table; +mark* gc_heap::loh_pinned_queue = 0; -#ifdef FEATURE_BASICFREEZE -sorted_table* gc_heap::seg_table; -#endif //FEATURE_BASICFREEZE +BOOL gc_heap::loh_compacted_p = FALSE; +#endif //FEATURE_LOH_COMPACTION -#ifdef MULTIPLE_HEAPS -GCEvent gc_heap::ee_suspend_event; -size_t gc_heap::min_gen0_balance_delta = 0; -size_t gc_heap::min_balance_threshold = 0; -#endif //MULTIPLE_HEAPS +#ifdef BACKGROUND_GC -VOLATILE(BOOL) gc_heap::gc_started; +EEThreadId gc_heap::bgc_thread_id; -#ifdef MULTIPLE_HEAPS -GCEvent gc_heap::gc_start_event; -bool gc_heap::gc_thread_no_affinitize_p = false; -uintptr_t process_mask = 0; +uint8_t* gc_heap::background_written_addresses [array_size+2]; -int gc_heap::n_heaps; // current number of heaps -int gc_heap::n_max_heaps; // maximum number of heaps +heap_segment* gc_heap::freeable_soh_segment = 0; -gc_heap** gc_heap::g_heaps; +size_t gc_heap::bgc_overflow_count = 0; -#if !defined(USE_REGIONS) || defined(_DEBUG) -size_t* gc_heap::g_promoted; -#endif //!USE_REGIONS || _DEBUG +size_t gc_heap::bgc_begin_uoh_size[uoh_generation_count] = {}; +size_t gc_heap::bgc_uoh_current_size[uoh_generation_count] = {}; +size_t gc_heap::end_uoh_size[uoh_generation_count] = {}; -#ifdef MH_SC_MARK -int* gc_heap::g_mark_stack_busy; -#endif //MH_SC_MARK +size_t gc_heap::uoh_a_no_bgc[uoh_generation_count] = {}; +size_t gc_heap::uoh_a_bgc_marking[uoh_generation_count] = {}; +size_t gc_heap::uoh_a_bgc_planning[uoh_generation_count] = {}; +#ifdef BGC_SERVO_TUNING +size_t gc_heap::bgc_maxgen_end_fl_size = 0; +#endif //BGC_SERVO_TUNING -#ifdef BACKGROUND_GC -size_t* gc_heap::g_bpromoted; -#endif //BACKGROUND_GC +size_t gc_heap::background_soh_size_end_mark = 0; -BOOL gc_heap::gradual_decommit_in_progress_p = FALSE; -size_t gc_heap::max_decommit_step_size = 0; -#else //MULTIPLE_HEAPS +size_t gc_heap::background_soh_alloc_count = 0; -#if !defined(USE_REGIONS) || defined(_DEBUG) -size_t gc_heap::g_promoted; -#endif //!USE_REGIONS || _DEBUG +uint8_t** gc_heap::background_mark_stack_tos = 0; -#ifdef BACKGROUND_GC -size_t gc_heap::g_bpromoted; -#endif //BACKGROUND_GC +uint8_t** gc_heap::background_mark_stack_array = 0; -// this is just to have fewer #ifdefs in code shared between WKS and SVR -// for filling out ScanContext structs -const int n_heaps = 1; +size_t gc_heap::background_mark_stack_array_length = 0; -#endif //MULTIPLE_HEAPS +BOOL gc_heap::processed_eph_overflow_p = FALSE; -size_t gc_heap::card_table_element_layout[total_bookkeeping_elements + 1]; -uint8_t* gc_heap::bookkeeping_start = nullptr; #ifdef USE_REGIONS -uint8_t* gc_heap::bookkeeping_covered_committed = nullptr; -size_t gc_heap::bookkeeping_sizes[total_bookkeeping_elements]; -#endif //USE_REGIONS - -size_t gc_heap::reserved_memory = 0; -size_t gc_heap::reserved_memory_limit = 0; -BOOL gc_heap::g_low_memory_status; +BOOL gc_heap::background_overflow_p = FALSE; +#else //USE_REGIONS +uint8_t* gc_heap::background_min_overflow_address =0; -static gc_reason gc_trigger_reason = reason_empty; +uint8_t* gc_heap::background_max_overflow_address =0; -gc_latency_level gc_heap::latency_level = latency_level_default; +uint8_t* gc_heap::background_min_soh_overflow_address =0; -gc_mechanisms gc_heap::settings; +uint8_t* gc_heap::background_max_soh_overflow_address =0; -gc_history_global gc_heap::gc_data_global; +heap_segment* gc_heap::saved_overflow_ephemeral_seg = 0; -uint64_t gc_heap::gc_last_ephemeral_decommit_time = 0; +heap_segment* gc_heap::saved_sweep_ephemeral_seg = 0; -CLRCriticalSection gc_heap::check_commit_cs; +uint8_t* gc_heap::saved_sweep_ephemeral_start = 0; +#endif //USE_REGIONS -#ifdef COMMITTED_BYTES_SHADOW -CLRCriticalSection gc_heap::decommit_lock; -#endif //COMMITTED_BYTES_SHADOW +Thread* gc_heap::bgc_thread = 0; -size_t gc_heap::current_total_committed = 0; +uint8_t** gc_heap::c_mark_list = 0; -size_t gc_heap::committed_by_oh[recorded_committed_bucket_counts]; +size_t gc_heap::c_mark_list_length = 0; -size_t gc_heap::current_total_committed_bookkeeping = 0; +size_t gc_heap::c_mark_list_index = 0; -BOOL gc_heap::reset_mm_p = TRUE; +gc_history_per_heap gc_heap::bgc_data_per_heap; -#ifdef FEATURE_EVENT_TRACE -bool gc_heap::informational_event_enabled_p = false; +BOOL gc_heap::bgc_thread_running; -uint64_t* gc_heap::gc_time_info = 0; +CLRCriticalSection gc_heap::bgc_threads_timeout_cs; -#ifdef BACKGROUND_GC -uint64_t* gc_heap::bgc_time_info = 0; #endif //BACKGROUND_GC -size_t gc_heap::physical_memory_from_config = 0; +uint8_t** gc_heap::mark_list; +uint8_t** gc_heap::mark_list_index; +uint8_t** gc_heap::mark_list_end; -size_t gc_heap::gen0_min_budget_from_config = 0; +#ifdef SNOOP_STATS +snoop_stats_data gc_heap::snoop_stat; +#endif //SNOOP_STATS -size_t gc_heap::gen0_max_budget_from_config = 0; +uint8_t* gc_heap::min_overflow_address = MAX_PTR; -int gc_heap::high_mem_percent_from_config = 0; +uint8_t* gc_heap::max_overflow_address = 0; -bool gc_heap::use_frozen_segments_p = false; +uint8_t* gc_heap::shigh = 0; -#ifdef FEATURE_LOH_COMPACTION -gc_heap::etw_loh_compact_info* gc_heap::loh_compact_info; -#endif //FEATURE_LOH_COMPACTION -#endif //FEATURE_EVENT_TRACE +uint8_t* gc_heap::slow = MAX_PTR; -bool gc_heap::hard_limit_config_p = false; +#ifndef USE_REGIONS +size_t gc_heap::ordered_free_space_indices[MAX_NUM_BUCKETS]; -#if defined(SHORT_PLUGS) && !defined(USE_REGIONS) -double gc_heap::short_plugs_pad_ratio = 0; -#endif //SHORT_PLUGS && !USE_REGIONS +size_t gc_heap::saved_ordered_free_space_indices[MAX_NUM_BUCKETS]; -int gc_heap::generation_skip_ratio_threshold = 0; -int gc_heap::conserve_mem_setting = 0; -bool gc_heap::spin_count_unit_config_p = false; +size_t gc_heap::ordered_plug_indices[MAX_NUM_BUCKETS]; -uint64_t gc_heap::suspended_start_time = 0; -uint64_t gc_heap::end_gc_time = 0; -uint64_t gc_heap::total_suspended_time = 0; -uint64_t gc_heap::process_start_time = 0; -last_recorded_gc_info gc_heap::last_ephemeral_gc_info; -last_recorded_gc_info gc_heap::last_full_blocking_gc_info; +size_t gc_heap::saved_ordered_plug_indices[MAX_NUM_BUCKETS]; -uint64_t gc_heap::last_alloc_reset_suspended_end_time = 0; -size_t gc_heap::max_peak_heap_size = 0; -VOLATILE(size_t) gc_heap::llc_size = 0; +BOOL gc_heap::ordered_plug_indices_init = FALSE; -#ifdef BACKGROUND_GC -last_recorded_gc_info gc_heap::last_bgc_info[2]; -VOLATILE(bool) gc_heap::is_last_recorded_bgc = false; -VOLATILE(int) gc_heap::last_bgc_info_index = 0; -#endif //BACKGROUND_GC +BOOL gc_heap::use_bestfit = FALSE; -#ifdef DYNAMIC_HEAP_COUNT -size_t gc_heap::hc_change_cancelled_count_prep = 0; -#ifdef BACKGROUND_GC -int gc_heap::bgc_th_creation_hist_index = 0; -gc_heap::bgc_thread_creation_history gc_heap::bgc_th_creation_hist[max_bgc_thread_creation_count]; -size_t gc_heap::bgc_th_count_created = 0; -size_t gc_heap::bgc_th_count_created_th_existed = 0; -size_t gc_heap::bgc_th_count_creation_failed = 0; -size_t gc_heap::bgc_init_gc_index = 0; -VOLATILE(short) gc_heap::bgc_init_n_heaps = 0; -size_t gc_heap::hc_change_cancelled_count_bgc = 0; -#endif //BACKGROUND_GC -#endif //DYNAMIC_HEAP_COUNT +uint8_t* gc_heap::bestfit_first_pin = 0; -#if defined(HOST_64BIT) -#define MAX_ALLOWED_MEM_LOAD 85 +BOOL gc_heap::commit_end_of_seg = FALSE; -// consider putting this in dynamic data - -// we may want different values for workstation -// and server GC. -#define MIN_YOUNGEST_GEN_DESIRED (16*1024*1024) +size_t gc_heap::max_free_space_items = 0; -size_t gc_heap::youngest_gen_desired_th; -#endif //HOST_64BIT +size_t gc_heap::free_space_buckets = 0; -uint64_t gc_heap::mem_one_percent = 0; +size_t gc_heap::free_space_items = 0; -uint32_t gc_heap::high_memory_load_th = 0; +int gc_heap::trimmed_free_space_index = 0; -uint32_t gc_heap::m_high_memory_load_th; +size_t gc_heap::total_ephemeral_plugs = 0; -uint32_t gc_heap::v_high_memory_load_th; +seg_free_spaces* gc_heap::bestfit_seg = 0; -uint32_t gc_heap::almost_high_memory_load_th; +size_t gc_heap::total_ephemeral_size = 0; +#endif //!USE_REGIONS -bool gc_heap::is_restricted_physical_mem; +#ifdef HEAP_ANALYZE -uint64_t gc_heap::total_physical_mem = 0; +size_t gc_heap::internal_root_array_length = initial_internal_roots; -uint64_t gc_heap::entry_available_physical_mem = 0; +uint8_t** gc_heap::internal_root_array = 0; -size_t gc_heap::heap_hard_limit = 0; +size_t gc_heap::internal_root_array_index = 0; -size_t gc_heap::heap_hard_limit_oh[total_oh_count]; +BOOL gc_heap::heap_analyze_success = TRUE; -#ifdef USE_REGIONS +uint8_t* gc_heap::current_obj = 0; +size_t gc_heap::current_obj_size = 0; -size_t gc_heap::regions_range = 0; +#endif //HEAP_ANALYZE -#endif //USE_REGIONS +#ifdef GC_CONFIG_DRIVEN +size_t gc_heap::interesting_data_per_gc[max_idp_count]; +//size_t gc_heap::interesting_data_per_heap[max_idp_count]; +//size_t gc_heap::interesting_mechanisms_per_heap[max_im_count]; +#endif //GC_CONFIG_DRIVEN +#endif //MULTIPLE_HEAPS -bool affinity_config_specified_p = false; +no_gc_region_info gc_heap::current_no_gc_region_info; +FinalizerWorkItem* gc_heap::finalizer_work; +BOOL gc_heap::proceed_with_gc_p = FALSE; +GCSpinLock gc_heap::gc_lock; -#ifdef USE_REGIONS -region_allocator global_region_allocator; -uint8_t*(*initial_regions)[total_generation_count][2] = nullptr; -size_t gc_heap::region_count = 0; +#ifdef FEATURE_JAVAMARSHAL +uint8_t** gc_heap::global_bridge_list; +size_t gc_heap::num_global_bridge_objs; +#endif //FEATURE_JAVAMARSHAL -gc_heap::region_info* gc_heap::map_region_to_generation = nullptr; -gc_heap::region_info* gc_heap::map_region_to_generation_skewed = nullptr; +#ifdef BACKGROUND_GC +uint64_t gc_heap::total_uoh_a_last_bgc = 0; +#endif //BACKGROUND_GC +#ifdef USE_REGIONS +region_free_list gc_heap::global_regions_to_decommit[count_free_region_kinds]; +region_free_list gc_heap::global_free_huge_regions; +#else //USE_REGIONS +size_t gc_heap::eph_gen_starts_size = 0; +heap_segment* gc_heap::segment_standby_list; #endif //USE_REGIONS +bool gc_heap::use_large_pages_p = 0; +#ifdef HEAP_BALANCE_INSTRUMENTATION +size_t gc_heap::last_gc_end_time_us = 0; +#endif //HEAP_BALANCE_INSTRUMENTATION +#ifdef USE_REGIONS +bool gc_heap::enable_special_regions_p = false; +#else //USE_REGIONS +size_t gc_heap::min_segment_size = 0; +size_t gc_heap::min_uoh_segment_size = 0; +#endif //!USE_REGIONS +size_t gc_heap::min_segment_size_shr = 0; +size_t gc_heap::soh_segment_size = 0; +size_t gc_heap::segment_info_size = 0; -#ifdef BACKGROUND_GC -GCEvent gc_heap::bgc_start_event; - -gc_mechanisms gc_heap::saved_bgc_settings; +#ifdef GC_CONFIG_DRIVEN +size_t gc_heap::compact_or_sweep_gcs[2]; +#endif //GC_CONFIG_DRIVEN -gc_history_global gc_heap::bgc_data_global; +#ifdef FEATURE_LOH_COMPACTION +BOOL gc_heap::loh_compaction_always_p = FALSE; +gc_loh_compaction_mode gc_heap::loh_compaction_mode = loh_compaction_default; +#endif //FEATURE_LOH_COMPACTION -GCEvent gc_heap::background_gc_done_event; +GCEvent gc_heap::full_gc_approach_event; -GCEvent gc_heap::ee_proceed_event; +GCEvent gc_heap::full_gc_end_event; -bool gc_heap::gc_can_use_concurrent = false; +uint32_t gc_heap::fgn_loh_percent = 0; -bool gc_heap::temp_disable_concurrent_p = false; +#ifdef BACKGROUND_GC +BOOL gc_heap::fgn_last_gc_was_concurrent = FALSE; +#endif //BACKGROUND_GC -uint32_t gc_heap::cm_in_progress = FALSE; +VOLATILE(bool) gc_heap::full_gc_approach_event_set; -BOOL gc_heap::dont_restart_ee_p = FALSE; +size_t gc_heap::full_gc_counts[gc_type_max]; -BOOL gc_heap::keep_bgc_threads_p = FALSE; +bool gc_heap::maxgen_size_inc_p = false; -GCEvent gc_heap::bgc_threads_sync_event; +#ifndef USE_REGIONS +BOOL gc_heap::should_expand_in_full_gc = FALSE; +#endif //!USE_REGIONS -BOOL gc_heap::do_ephemeral_gc_p = FALSE; +#ifdef DYNAMIC_HEAP_COUNT +int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default; +gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data; +size_t gc_heap::current_total_soh_stable_size = 0; +uint64_t gc_heap::last_suspended_end_time = 0; +uint64_t gc_heap::change_heap_count_time = 0; +uint64_t gc_heap::total_change_heap_count = 0; +uint64_t gc_heap::total_change_heap_count_time = 0; +size_t gc_heap::gc_index_full_gc_end = 0; +uint64_t gc_heap::before_distribute_free_regions_time = 0; +bool gc_heap::trigger_initial_gen2_p = false; -BOOL gc_heap::do_concurrent_p = FALSE; - -size_t gc_heap::ephemeral_fgc_counts[max_generation]; +#ifdef BACKGROUND_GC +bool gc_heap::trigger_bgc_for_rethreading_p = false; +int gc_heap::total_bgc_threads = 0; +int gc_heap::last_bgc_n_heaps = 0; +int gc_heap::last_total_bgc_threads = 0; +#endif //BACKGROUND_GC -VOLATILE(c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free; +#ifdef STRESS_DYNAMIC_HEAP_COUNT +int gc_heap::heaps_in_this_gc = 0; +int gc_heap::bgc_to_ngc2_ratio = 0; +#endif //STRESS_DYNAMIC_HEAP_COUNT +#endif // DYNAMIC_HEAP_COUNT -VOLATILE(BOOL) gc_heap::gc_background_running = FALSE; -#endif //BACKGROUND_GC +// Provisional mode related stuff. +bool gc_heap::provisional_mode_triggered = false; +bool gc_heap::pm_trigger_full_gc = false; +size_t gc_heap::provisional_triggered_gc_count = 0; +size_t gc_heap::provisional_off_gc_count = 0; +size_t gc_heap::num_provisional_triggered = 0; +bool gc_heap::pm_stress_on = false; -#ifdef USE_REGIONS -#ifdef MULTIPLE_HEAPS -uint8_t* gc_heap::gc_low; -uint8_t* gc_heap::gc_high; -#endif //MULTIPLE_HEAPS -VOLATILE(uint8_t*) gc_heap::ephemeral_low; -VOLATILE(uint8_t*) gc_heap::ephemeral_high; -#endif //USE_REGIONS +#ifdef HEAP_ANALYZE +BOOL gc_heap::heap_analyze_enabled = FALSE; +#endif //HEAP_ANALYZE #ifndef MULTIPLE_HEAPS -#ifdef SPINLOCK_HISTORY -int gc_heap::spinlock_info_index = 0; -spinlock_info gc_heap::last_spinlock_info[max_saved_spinlock_info]; -allocation_state gc_heap::current_uoh_alloc_state = (allocation_state)-1; -#endif //SPINLOCK_HISTORY -uint32_t gc_heap::fgn_maxgen_percent = 0; -size_t gc_heap::fgn_last_alloc = 0; +alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST - 1]; +alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST - 1]; +alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST - 1]; -int gc_heap::generation_skip_ratio = 100; -#ifdef FEATURE_CARD_MARKING_STEALING -VOLATILE(size_t) gc_heap::n_eph_soh = 0; -VOLATILE(size_t) gc_heap::n_gen_soh = 0; -VOLATILE(size_t) gc_heap::n_eph_loh = 0; -VOLATILE(size_t) gc_heap::n_gen_loh = 0; -#endif //FEATURE_CARD_MARKING_STEALING +#ifdef DOUBLY_LINKED_FL +// size we removed with no undo; only for recording purpose +size_t gc_heap::gen2_removed_no_undo = 0; +size_t gc_heap::saved_pinned_plug_index = INVALID_SAVED_PINNED_PLUG_INDEX; +#endif //DOUBLY_LINKED_FL -uint64_t gc_heap::loh_alloc_since_cg = 0; +#ifdef FEATURE_EVENT_TRACE +etw_bucket_info gc_heap::bucket_info[NUM_GEN2_ALIST]; +#endif //FEATURE_EVENT_TRACE -BOOL gc_heap::elevation_requested = FALSE; +dynamic_data gc_heap::dynamic_data_table [total_generation_count]; +gc_history_per_heap gc_heap::gc_data_per_heap; +size_t gc_heap::total_promoted_bytes = 0; +size_t gc_heap::finalization_promoted_bytes = 0; +size_t gc_heap::maxgen_pinned_compact_before_advance = 0; -BOOL gc_heap::last_gc_before_oom = FALSE; +uint8_t* gc_heap::alloc_allocated = 0; -BOOL gc_heap::sufficient_gen0_space_p = FALSE; +size_t gc_heap::allocation_quantum = CLR_SIZE; -BOOL gc_heap::decide_promote_gen1_pins_p = TRUE; +GCSpinLock gc_heap::more_space_lock_soh; +GCSpinLock gc_heap::more_space_lock_uoh; #ifdef BACKGROUND_GC -uint8_t* gc_heap::background_saved_lowest_address = 0; -uint8_t* gc_heap::background_saved_highest_address = 0; -uint8_t* gc_heap::next_sweep_obj = 0; -uint8_t* gc_heap::current_sweep_pos = 0; -#ifdef DOUBLY_LINKED_FL -heap_segment* gc_heap::current_sweep_seg = 0; -#endif //DOUBLY_LINKED_FL -exclusive_sync* gc_heap::bgc_alloc_lock; +VOLATILE(int32_t) gc_heap::uoh_alloc_thread_count = 0; #endif //BACKGROUND_GC -oom_history gc_heap::oom_info; - -int gc_heap::oomhist_index_per_heap = 0; +#ifdef SYNCHRONIZATION_STATS +unsigned int gc_heap::good_suspension = 0; +unsigned int gc_heap::bad_suspension = 0; +uint64_t gc_heap::total_msl_acquire = 0; +unsigned int gc_heap::num_msl_acquired = 0; +unsigned int gc_heap::num_high_msl_acquire = 0; +unsigned int gc_heap::num_low_msl_acquire = 0; +#endif //SYNCHRONIZATION_STATS -oom_history gc_heap::oomhist_per_heap[max_oom_history_count]; +size_t gc_heap::alloc_contexts_used = 0; +size_t gc_heap::soh_allocation_no_gc = 0; +size_t gc_heap::loh_allocation_no_gc = 0; +bool gc_heap::no_gc_oom_p = false; +heap_segment* gc_heap::saved_loh_segment_no_gc = 0; -fgm_history gc_heap::fgm_result; +#endif //MULTIPLE_HEAPS -size_t gc_heap::allocated_since_last_gc[total_oh_count]; +#ifndef MULTIPLE_HEAPS -#ifndef USE_REGIONS -BOOL gc_heap::ro_segments_in_range = FALSE; -uint8_t* gc_heap::ephemeral_low; -uint8_t* gc_heap::ephemeral_high; -BOOL gc_heap::ephemeral_promotion; -uint8_t* gc_heap::saved_ephemeral_plan_start[ephemeral_generation_count]; -size_t gc_heap::saved_ephemeral_plan_start_size[ephemeral_generation_count]; -#endif //!USE_REGIONS +BOOL gc_heap::gen0_bricks_cleared = FALSE; -uint8_t* gc_heap::lowest_address; +int gc_heap::gen0_must_clear_bricks = 0; -uint8_t* gc_heap::highest_address; +#ifdef FEATURE_PREMORTEM_FINALIZATION +CFinalize* gc_heap::finalize_queue = 0; +#endif // FEATURE_PREMORTEM_FINALIZATION -short* gc_heap::brick_table; +#ifdef FEATURE_CARD_MARKING_STEALING +VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh; +VOLATILE(bool) gc_heap::card_mark_done_soh; +VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh; +VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh; +VOLATILE(bool) gc_heap::card_mark_done_uoh; +#endif // FEATURE_CARD_MARKING_STEALING -uint32_t* gc_heap::card_table; +generation gc_heap::generation_table [total_generation_count]; -#ifdef CARD_BUNDLE -uint32_t* gc_heap::card_bundle_table; -#endif //CARD_BUNDLE +size_t gc_heap::interesting_data_per_heap[max_idp_count]; -uint8_t* gc_heap::gc_low = 0; +size_t gc_heap::compact_reasons_per_heap[max_compact_reasons_count]; -uint8_t* gc_heap::gc_high = 0; +size_t gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count]; -#ifndef USE_REGIONS -uint8_t* gc_heap::demotion_low; +size_t gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count]; -uint8_t* gc_heap::demotion_high; +mark_queue_t gc_heap::mark_queue; -uint8_t* gc_heap::last_gen1_pin_end; -#endif //!USE_REGIONS +#ifdef USE_REGIONS +bool gc_heap::special_sweep_p = false; +#endif //USE_REGIONS -gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons; +int gc_heap::loh_pinned_queue_decay = LOH_PIN_DECAY; -size_t gc_heap::etw_allocation_running_amount[total_oh_count]; +#endif // MULTIPLE_HEAPS -uint64_t gc_heap::total_alloc_bytes_soh = 0; +/* end of per heap static initialization */ -uint64_t gc_heap::total_alloc_bytes_uoh = 0; +#ifdef USE_REGIONS +const size_t uninitialized_end_gen0_region_space = (size_t)(-1); +#endif //USE_REGIONS -int gc_heap::gc_policy = 0; +// budget smoothing +size_t gc_heap::smoothed_desired_total[total_generation_count]; +/* end of static initialization */ -uint64_t gc_heap::allocation_running_time; +void gen_to_condemn_tuning::print (int heap_num) +{ +#ifdef DT_LOG + dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition)); + dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header)); + gc_condemn_reason_gen r_gen; + for (int i = 0; i < gcrg_max; i++) + { + r_gen = (gc_condemn_reason_gen)(i); + str_reasons_gen[i * 2] = get_gen_char (get_gen (r_gen)); + } + dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_gen)); -size_t gc_heap::allocation_running_amount; + dprintf (DT_LOG_0, ("%s", record_condemn_reasons_condition_header)); + gc_condemn_reason_condition r_condition; + for (int i = 0; i < gcrc_max; i++) + { + r_condition = (gc_condemn_reason_condition)(i); + str_reasons_condition[i * 2] = get_condition_char (get_condition (r_condition)); + } -heap_segment* gc_heap::ephemeral_heap_segment = 0; + dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_condition)); +#else + UNREFERENCED_PARAMETER(heap_num); +#endif //DT_LOG +} -#ifdef USE_REGIONS -#ifdef STRESS_REGIONS -OBJECTHANDLE* gc_heap::pinning_handles_for_alloc = 0; -int gc_heap::ph_index_per_heap = 0; -int gc_heap::pinning_seg_interval = 2; -size_t gc_heap::num_gen0_regions = 0; -int gc_heap::sip_seg_interval = 0; -int gc_heap::sip_seg_maxgen_interval = 0; -size_t gc_heap::num_condemned_regions = 0; -#endif //STRESS_REGIONS +void gc_generation_data::print (int heap_num, int gen_num) +{ +#if defined(SIMPLE_DPRINTF) && defined(DT_LOG) + dprintf (DT_LOG_0, ("[%2d]gen%d beg %zd fl %zd fo %zd end %zd fl %zd fo %zd in %zd p %zd np %zd alloc %zd", + heap_num, gen_num, + size_before, + free_list_space_before, free_obj_space_before, + size_after, + free_list_space_after, free_obj_space_after, + in, pinned_surv, npinned_surv, + new_allocation)); +#else + UNREFERENCED_PARAMETER(heap_num); + UNREFERENCED_PARAMETER(gen_num); +#endif //SIMPLE_DPRINTF && DT_LOG +} -region_free_list gc_heap::free_regions[count_free_region_kinds]; +void gc_history_per_heap::set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value) +{ + uint32_t* mechanism = &mechanisms[mechanism_per_heap]; + *mechanism = 0; + *mechanism |= mechanism_mask; + *mechanism |= (1 << value); -int gc_heap::num_regions_freed_in_sweep = 0; +#ifdef DT_LOG + gc_mechanism_descr* descr = &gc_mechanisms_descr[mechanism_per_heap]; + dprintf (DT_LOG_0, ("setting %s: %s", + descr->name, + (descr->descr)[value])); +#endif //DT_LOG +} -int gc_heap::regions_per_gen[max_generation + 1]; +void gc_history_per_heap::print() +{ +#if defined(SIMPLE_DPRINTF) && defined(DT_LOG) + for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++) + { + gen_data[i].print (heap_index, i); + } -int gc_heap::planned_regions_per_gen[max_generation + 1]; + dprintf (DT_LOG_0, ("fla %zd flr %zd esa %zd ca %zd pa %zd paa %zd, rfle %d, ec %zd", + maxgen_size_info.free_list_allocated, + maxgen_size_info.free_list_rejected, + maxgen_size_info.end_seg_allocated, + maxgen_size_info.condemned_allocated, + maxgen_size_info.pinned_allocated, + maxgen_size_info.pinned_allocated_advance, + maxgen_size_info.running_free_list_efficiency, + extra_gen0_committed)); -int gc_heap::sip_maxgen_regions_per_gen[max_generation + 1]; + int mechanism = 0; + gc_mechanism_descr* descr = 0; -heap_segment* gc_heap::reserved_free_regions_sip[max_generation]; + for (int i = 0; i < max_mechanism_per_heap; i++) + { + mechanism = get_mechanism ((gc_mechanism_per_heap)i); -int gc_heap::new_gen0_regions_in_plns = 0; -int gc_heap::new_regions_in_prr = 0; -int gc_heap::new_regions_in_threading = 0; - -size_t gc_heap::end_gen0_region_space = 0; - -size_t gc_heap::end_gen0_region_committed_space = 0; - -size_t gc_heap::gen0_pinned_free_space = 0; - -bool gc_heap::gen0_large_chunk_found = false; - -size_t* gc_heap::survived_per_region = nullptr; - -size_t* gc_heap::old_card_survived_per_region = nullptr; -#endif //USE_REGIONS - -BOOL gc_heap::blocking_collection = FALSE; - -heap_segment* gc_heap::freeable_uoh_segment = 0; - -uint64_t gc_heap::time_bgc_last = 0; - -size_t gc_heap::mark_stack_tos = 0; - -size_t gc_heap::mark_stack_bos = 0; - -size_t gc_heap::mark_stack_array_length = 0; - -mark* gc_heap::mark_stack_array = 0; - -#if defined (_DEBUG) && defined (VERIFY_HEAP) -BOOL gc_heap::verify_pinned_queue_p = FALSE; -#endif //_DEBUG && VERIFY_HEAP - -uint8_t* gc_heap::oldest_pinned_plug = 0; - -size_t gc_heap::num_pinned_objects = 0; - -#ifdef FEATURE_LOH_COMPACTION -size_t gc_heap::loh_pinned_queue_tos = 0; - -size_t gc_heap::loh_pinned_queue_bos = 0; - -size_t gc_heap::loh_pinned_queue_length = 0; - -mark* gc_heap::loh_pinned_queue = 0; - -BOOL gc_heap::loh_compacted_p = FALSE; -#endif //FEATURE_LOH_COMPACTION - -#ifdef BACKGROUND_GC - -EEThreadId gc_heap::bgc_thread_id; - -uint8_t* gc_heap::background_written_addresses [array_size+2]; - -heap_segment* gc_heap::freeable_soh_segment = 0; - -size_t gc_heap::bgc_overflow_count = 0; - -size_t gc_heap::bgc_begin_uoh_size[uoh_generation_count] = {}; -size_t gc_heap::bgc_uoh_current_size[uoh_generation_count] = {}; -size_t gc_heap::end_uoh_size[uoh_generation_count] = {}; - -size_t gc_heap::uoh_a_no_bgc[uoh_generation_count] = {}; -size_t gc_heap::uoh_a_bgc_marking[uoh_generation_count] = {}; -size_t gc_heap::uoh_a_bgc_planning[uoh_generation_count] = {}; -#ifdef BGC_SERVO_TUNING -size_t gc_heap::bgc_maxgen_end_fl_size = 0; -#endif //BGC_SERVO_TUNING - -size_t gc_heap::background_soh_size_end_mark = 0; - -size_t gc_heap::background_soh_alloc_count = 0; - -uint8_t** gc_heap::background_mark_stack_tos = 0; - -uint8_t** gc_heap::background_mark_stack_array = 0; - -size_t gc_heap::background_mark_stack_array_length = 0; - -BOOL gc_heap::processed_eph_overflow_p = FALSE; - -#ifdef USE_REGIONS -BOOL gc_heap::background_overflow_p = FALSE; -#else //USE_REGIONS -uint8_t* gc_heap::background_min_overflow_address =0; - -uint8_t* gc_heap::background_max_overflow_address =0; - -uint8_t* gc_heap::background_min_soh_overflow_address =0; - -uint8_t* gc_heap::background_max_soh_overflow_address =0; - -heap_segment* gc_heap::saved_overflow_ephemeral_seg = 0; - -heap_segment* gc_heap::saved_sweep_ephemeral_seg = 0; - -uint8_t* gc_heap::saved_sweep_ephemeral_start = 0; -#endif //USE_REGIONS - -Thread* gc_heap::bgc_thread = 0; - -uint8_t** gc_heap::c_mark_list = 0; - -size_t gc_heap::c_mark_list_length = 0; - -size_t gc_heap::c_mark_list_index = 0; - -gc_history_per_heap gc_heap::bgc_data_per_heap; - -BOOL gc_heap::bgc_thread_running; - -CLRCriticalSection gc_heap::bgc_threads_timeout_cs; - -#endif //BACKGROUND_GC - -uint8_t** gc_heap::mark_list; -uint8_t** gc_heap::mark_list_index; -uint8_t** gc_heap::mark_list_end; - -#ifdef SNOOP_STATS -snoop_stats_data gc_heap::snoop_stat; -#endif //SNOOP_STATS - -uint8_t* gc_heap::min_overflow_address = MAX_PTR; - -uint8_t* gc_heap::max_overflow_address = 0; - -uint8_t* gc_heap::shigh = 0; - -uint8_t* gc_heap::slow = MAX_PTR; - -#ifndef USE_REGIONS -size_t gc_heap::ordered_free_space_indices[MAX_NUM_BUCKETS]; - -size_t gc_heap::saved_ordered_free_space_indices[MAX_NUM_BUCKETS]; - -size_t gc_heap::ordered_plug_indices[MAX_NUM_BUCKETS]; - -size_t gc_heap::saved_ordered_plug_indices[MAX_NUM_BUCKETS]; - -BOOL gc_heap::ordered_plug_indices_init = FALSE; - -BOOL gc_heap::use_bestfit = FALSE; - -uint8_t* gc_heap::bestfit_first_pin = 0; - -BOOL gc_heap::commit_end_of_seg = FALSE; - -size_t gc_heap::max_free_space_items = 0; - -size_t gc_heap::free_space_buckets = 0; - -size_t gc_heap::free_space_items = 0; - -int gc_heap::trimmed_free_space_index = 0; - -size_t gc_heap::total_ephemeral_plugs = 0; - -seg_free_spaces* gc_heap::bestfit_seg = 0; - -size_t gc_heap::total_ephemeral_size = 0; -#endif //!USE_REGIONS - -#ifdef HEAP_ANALYZE - -size_t gc_heap::internal_root_array_length = initial_internal_roots; - -uint8_t** gc_heap::internal_root_array = 0; - -size_t gc_heap::internal_root_array_index = 0; - -BOOL gc_heap::heap_analyze_success = TRUE; - -uint8_t* gc_heap::current_obj = 0; -size_t gc_heap::current_obj_size = 0; - -#endif //HEAP_ANALYZE - -#ifdef GC_CONFIG_DRIVEN -size_t gc_heap::interesting_data_per_gc[max_idp_count]; -//size_t gc_heap::interesting_data_per_heap[max_idp_count]; -//size_t gc_heap::interesting_mechanisms_per_heap[max_im_count]; -#endif //GC_CONFIG_DRIVEN -#endif //MULTIPLE_HEAPS - -no_gc_region_info gc_heap::current_no_gc_region_info; -FinalizerWorkItem* gc_heap::finalizer_work; -BOOL gc_heap::proceed_with_gc_p = FALSE; -GCSpinLock gc_heap::gc_lock; - -#ifdef FEATURE_JAVAMARSHAL -uint8_t** gc_heap::global_bridge_list; -size_t gc_heap::num_global_bridge_objs; -#endif //FEATURE_JAVAMARSHAL - -#ifdef BACKGROUND_GC -uint64_t gc_heap::total_uoh_a_last_bgc = 0; -#endif //BACKGROUND_GC - -#ifdef USE_REGIONS -region_free_list gc_heap::global_regions_to_decommit[count_free_region_kinds]; -region_free_list gc_heap::global_free_huge_regions; -#else //USE_REGIONS -size_t gc_heap::eph_gen_starts_size = 0; -heap_segment* gc_heap::segment_standby_list; -#endif //USE_REGIONS -bool gc_heap::use_large_pages_p = 0; -#ifdef HEAP_BALANCE_INSTRUMENTATION -size_t gc_heap::last_gc_end_time_us = 0; -#endif //HEAP_BALANCE_INSTRUMENTATION -#ifdef USE_REGIONS -bool gc_heap::enable_special_regions_p = false; -#else //USE_REGIONS -size_t gc_heap::min_segment_size = 0; -size_t gc_heap::min_uoh_segment_size = 0; -#endif //!USE_REGIONS -size_t gc_heap::min_segment_size_shr = 0; -size_t gc_heap::soh_segment_size = 0; -size_t gc_heap::segment_info_size = 0; - -#ifdef GC_CONFIG_DRIVEN -size_t gc_heap::compact_or_sweep_gcs[2]; -#endif //GC_CONFIG_DRIVEN - -#ifdef FEATURE_LOH_COMPACTION -BOOL gc_heap::loh_compaction_always_p = FALSE; -gc_loh_compaction_mode gc_heap::loh_compaction_mode = loh_compaction_default; -#endif //FEATURE_LOH_COMPACTION - -GCEvent gc_heap::full_gc_approach_event; - -GCEvent gc_heap::full_gc_end_event; - -uint32_t gc_heap::fgn_loh_percent = 0; - -#ifdef BACKGROUND_GC -BOOL gc_heap::fgn_last_gc_was_concurrent = FALSE; -#endif //BACKGROUND_GC - -VOLATILE(bool) gc_heap::full_gc_approach_event_set; - -size_t gc_heap::full_gc_counts[gc_type_max]; - -bool gc_heap::maxgen_size_inc_p = false; - -#ifndef USE_REGIONS -BOOL gc_heap::should_expand_in_full_gc = FALSE; -#endif //!USE_REGIONS - -#ifdef DYNAMIC_HEAP_COUNT -int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default; -gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data; -size_t gc_heap::current_total_soh_stable_size = 0; -uint64_t gc_heap::last_suspended_end_time = 0; -uint64_t gc_heap::change_heap_count_time = 0; -uint64_t gc_heap::total_change_heap_count = 0; -uint64_t gc_heap::total_change_heap_count_time = 0; -size_t gc_heap::gc_index_full_gc_end = 0; -uint64_t gc_heap::before_distribute_free_regions_time = 0; -bool gc_heap::trigger_initial_gen2_p = false; - -#ifdef BACKGROUND_GC -bool gc_heap::trigger_bgc_for_rethreading_p = false; -int gc_heap::total_bgc_threads = 0; -int gc_heap::last_bgc_n_heaps = 0; -int gc_heap::last_total_bgc_threads = 0; -#endif //BACKGROUND_GC - -#ifdef STRESS_DYNAMIC_HEAP_COUNT -int gc_heap::heaps_in_this_gc = 0; -int gc_heap::bgc_to_ngc2_ratio = 0; -#endif //STRESS_DYNAMIC_HEAP_COUNT -#endif // DYNAMIC_HEAP_COUNT - -// Provisional mode related stuff. -bool gc_heap::provisional_mode_triggered = false; -bool gc_heap::pm_trigger_full_gc = false; -size_t gc_heap::provisional_triggered_gc_count = 0; -size_t gc_heap::provisional_off_gc_count = 0; -size_t gc_heap::num_provisional_triggered = 0; -bool gc_heap::pm_stress_on = false; - -#ifdef HEAP_ANALYZE -BOOL gc_heap::heap_analyze_enabled = FALSE; -#endif //HEAP_ANALYZE - -#ifndef MULTIPLE_HEAPS - -alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST - 1]; -alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST - 1]; -alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST - 1]; - -#ifdef DOUBLY_LINKED_FL -// size we removed with no undo; only for recording purpose -size_t gc_heap::gen2_removed_no_undo = 0; -size_t gc_heap::saved_pinned_plug_index = INVALID_SAVED_PINNED_PLUG_INDEX; -#endif //DOUBLY_LINKED_FL - -#ifdef FEATURE_EVENT_TRACE -etw_bucket_info gc_heap::bucket_info[NUM_GEN2_ALIST]; -#endif //FEATURE_EVENT_TRACE - -dynamic_data gc_heap::dynamic_data_table [total_generation_count]; -gc_history_per_heap gc_heap::gc_data_per_heap; -size_t gc_heap::total_promoted_bytes = 0; -size_t gc_heap::finalization_promoted_bytes = 0; -size_t gc_heap::maxgen_pinned_compact_before_advance = 0; - -uint8_t* gc_heap::alloc_allocated = 0; - -size_t gc_heap::allocation_quantum = CLR_SIZE; - -GCSpinLock gc_heap::more_space_lock_soh; -GCSpinLock gc_heap::more_space_lock_uoh; - -#ifdef BACKGROUND_GC -VOLATILE(int32_t) gc_heap::uoh_alloc_thread_count = 0; -#endif //BACKGROUND_GC - -#ifdef SYNCHRONIZATION_STATS -unsigned int gc_heap::good_suspension = 0; -unsigned int gc_heap::bad_suspension = 0; -uint64_t gc_heap::total_msl_acquire = 0; -unsigned int gc_heap::num_msl_acquired = 0; -unsigned int gc_heap::num_high_msl_acquire = 0; -unsigned int gc_heap::num_low_msl_acquire = 0; -#endif //SYNCHRONIZATION_STATS - -size_t gc_heap::alloc_contexts_used = 0; -size_t gc_heap::soh_allocation_no_gc = 0; -size_t gc_heap::loh_allocation_no_gc = 0; -bool gc_heap::no_gc_oom_p = false; -heap_segment* gc_heap::saved_loh_segment_no_gc = 0; - -#endif //MULTIPLE_HEAPS - -#ifndef MULTIPLE_HEAPS - -BOOL gc_heap::gen0_bricks_cleared = FALSE; - -int gc_heap::gen0_must_clear_bricks = 0; - -#ifdef FEATURE_PREMORTEM_FINALIZATION -CFinalize* gc_heap::finalize_queue = 0; -#endif // FEATURE_PREMORTEM_FINALIZATION - -#ifdef FEATURE_CARD_MARKING_STEALING -VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh; -VOLATILE(bool) gc_heap::card_mark_done_soh; -VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh; -VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh; -VOLATILE(bool) gc_heap::card_mark_done_uoh; -#endif // FEATURE_CARD_MARKING_STEALING - -generation gc_heap::generation_table [total_generation_count]; - -size_t gc_heap::interesting_data_per_heap[max_idp_count]; - -size_t gc_heap::compact_reasons_per_heap[max_compact_reasons_count]; - -size_t gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count]; - -size_t gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count]; - -mark_queue_t gc_heap::mark_queue; - -#ifdef USE_REGIONS -bool gc_heap::special_sweep_p = false; -#endif //USE_REGIONS - -int gc_heap::loh_pinned_queue_decay = LOH_PIN_DECAY; - -#endif // MULTIPLE_HEAPS - -/* end of per heap static initialization */ - -#ifdef USE_REGIONS -const size_t uninitialized_end_gen0_region_space = (size_t)(-1); -#endif //USE_REGIONS - -// budget smoothing -size_t gc_heap::smoothed_desired_total[total_generation_count]; -/* end of static initialization */ - -// This is for methods that need to iterate through all SOH heap segments/regions. -inline -int get_start_generation_index() -{ -#ifdef USE_REGIONS - return 0; -#else - return max_generation; -#endif //USE_REGIONS -} - -inline -int get_stop_generation_index (int condemned_gen_number) -{ -#ifdef USE_REGIONS - return 0; -#else - return condemned_gen_number; -#endif //USE_REGIONS -} - -void gen_to_condemn_tuning::print (int heap_num) -{ -#ifdef DT_LOG - dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition)); - dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header)); - gc_condemn_reason_gen r_gen; - for (int i = 0; i < gcrg_max; i++) - { - r_gen = (gc_condemn_reason_gen)(i); - str_reasons_gen[i * 2] = get_gen_char (get_gen (r_gen)); - } - dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_gen)); - - dprintf (DT_LOG_0, ("%s", record_condemn_reasons_condition_header)); - gc_condemn_reason_condition r_condition; - for (int i = 0; i < gcrc_max; i++) - { - r_condition = (gc_condemn_reason_condition)(i); - str_reasons_condition[i * 2] = get_condition_char (get_condition (r_condition)); - } - - dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_condition)); -#else - UNREFERENCED_PARAMETER(heap_num); -#endif //DT_LOG -} - -void gc_generation_data::print (int heap_num, int gen_num) -{ -#if defined(SIMPLE_DPRINTF) && defined(DT_LOG) - dprintf (DT_LOG_0, ("[%2d]gen%d beg %zd fl %zd fo %zd end %zd fl %zd fo %zd in %zd p %zd np %zd alloc %zd", - heap_num, gen_num, - size_before, - free_list_space_before, free_obj_space_before, - size_after, - free_list_space_after, free_obj_space_after, - in, pinned_surv, npinned_surv, - new_allocation)); -#else - UNREFERENCED_PARAMETER(heap_num); - UNREFERENCED_PARAMETER(gen_num); -#endif //SIMPLE_DPRINTF && DT_LOG -} - -void gc_history_per_heap::set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value) -{ - uint32_t* mechanism = &mechanisms[mechanism_per_heap]; - *mechanism = 0; - *mechanism |= mechanism_mask; - *mechanism |= (1 << value); - -#ifdef DT_LOG - gc_mechanism_descr* descr = &gc_mechanisms_descr[mechanism_per_heap]; - dprintf (DT_LOG_0, ("setting %s: %s", - descr->name, - (descr->descr)[value])); -#endif //DT_LOG -} - -void gc_history_per_heap::print() -{ -#if defined(SIMPLE_DPRINTF) && defined(DT_LOG) - for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++) - { - gen_data[i].print (heap_index, i); - } - - dprintf (DT_LOG_0, ("fla %zd flr %zd esa %zd ca %zd pa %zd paa %zd, rfle %d, ec %zd", - maxgen_size_info.free_list_allocated, - maxgen_size_info.free_list_rejected, - maxgen_size_info.end_seg_allocated, - maxgen_size_info.condemned_allocated, - maxgen_size_info.pinned_allocated, - maxgen_size_info.pinned_allocated_advance, - maxgen_size_info.running_free_list_efficiency, - extra_gen0_committed)); - - int mechanism = 0; - gc_mechanism_descr* descr = 0; - - for (int i = 0; i < max_mechanism_per_heap; i++) - { - mechanism = get_mechanism ((gc_mechanism_per_heap)i); - - if (mechanism >= 0) - { - descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i]; - dprintf (DT_LOG_0, ("[%2d]%s%s", - heap_index, - descr->name, - (descr->descr)[mechanism])); - } - } -#endif //SIMPLE_DPRINTF && DT_LOG -} - -void gc_history_global::print() -{ -#ifdef DT_LOG - char str_settings[64]; - memset (str_settings, '|', sizeof (char) * 64); - str_settings[max_global_mechanisms_count*2] = 0; - - for (int i = 0; i < max_global_mechanisms_count; i++) - { - str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N'); - } - - dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|")); - - dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings)); - dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %zd(%d), memload %d", - condemned_generation, - str_gc_reasons[reason], - str_gc_pause_modes[pause_mode], - final_youngest_desired, - gen0_reduction_count, - mem_pressure)); -#endif //DT_LOG -} - -uint32_t limit_time_to_uint32 (uint64_t time) -{ - time = min (time, (uint64_t)UINT32_MAX); - return (uint32_t)time; -} - -inline BOOL -in_range_for_segment(uint8_t* add, heap_segment* seg) -{ - return ((add >= heap_segment_mem (seg)) && (add < heap_segment_reserved (seg))); -} - -#ifdef FEATURE_BASICFREEZE -// The array we allocate is organized as follows: -// 0th element is the address of the last array we allocated. -// starting from the 1st element are the segment addresses, that's -// what buckets() returns. -struct bk -{ - uint8_t* add; - size_t val; -}; - -class sorted_table -{ -private: - ptrdiff_t size; - ptrdiff_t count; - bk* slots; - bk* buckets() { return (slots + 1); } - uint8_t*& last_slot (bk* arr) { return arr[0].add; } - bk* old_slots; -public: - static sorted_table* make_sorted_table (); - BOOL insert (uint8_t* add, size_t val);; - size_t lookup (uint8_t*& add); - void remove (uint8_t* add); - void clear (); - void delete_sorted_table(); - void delete_old_slots(); - void enqueue_old_slot(bk* sl); - BOOL ensure_space_for_insert(); -}; - -sorted_table* -sorted_table::make_sorted_table () -{ - size_t size = 400; - - // allocate one more bk to store the older slot address. - sorted_table* res = (sorted_table*)new (nothrow) char [sizeof (sorted_table) + (size + 1) * sizeof (bk)]; - if (!res) - return 0; - res->size = size; - res->slots = (bk*)(res + 1); - res->old_slots = 0; - res->clear(); - return res; -} - -void -sorted_table::delete_sorted_table() -{ - if (slots != (bk*)(this+1)) - { - delete[] slots; - } - delete_old_slots(); -} -void -sorted_table::delete_old_slots() -{ - uint8_t* sl = (uint8_t*)old_slots; - while (sl) - { - uint8_t* dsl = sl; - sl = last_slot ((bk*)sl); - delete[] dsl; - } - old_slots = 0; -} -void -sorted_table::enqueue_old_slot(bk* sl) -{ - last_slot (sl) = (uint8_t*)old_slots; - old_slots = sl; -} - -inline -size_t -sorted_table::lookup (uint8_t*& add) -{ - ptrdiff_t high = (count-1); - ptrdiff_t low = 0; - ptrdiff_t ti; - ptrdiff_t mid; - bk* buck = buckets(); - while (low <= high) - { - mid = ((low + high)/2); - ti = mid; - if (buck[ti].add > add) - { - if ((ti > 0) && (buck[ti-1].add <= add)) - { - add = buck[ti-1].add; - return buck[ti - 1].val; - } - high = mid - 1; - } - else - { - if (buck[ti+1].add > add) - { - add = buck[ti].add; - return buck[ti].val; - } - low = mid + 1; - } - } - add = 0; - return 0; -} - -BOOL -sorted_table::ensure_space_for_insert() -{ - if (count == size) - { - size = (size * 3)/2; - assert((size * sizeof (bk)) > 0); - bk* res = (bk*)new (nothrow) char [(size + 1) * sizeof (bk)]; - assert (res); - if (!res) - return FALSE; - - last_slot (res) = 0; - memcpy (((bk*)res + 1), buckets(), count * sizeof (bk)); - bk* last_old_slots = slots; - slots = res; - if (last_old_slots != (bk*)(this + 1)) - enqueue_old_slot (last_old_slots); - } - return TRUE; -} - -BOOL -sorted_table::insert (uint8_t* add, size_t val) -{ - //grow if no more room - assert (count < size); - - //insert sorted - ptrdiff_t high = (count-1); - ptrdiff_t low = 0; - ptrdiff_t ti; - ptrdiff_t mid; - bk* buck = buckets(); - while (low <= high) - { - mid = ((low + high)/2); - ti = mid; - if (buck[ti].add > add) - { - if ((ti == 0) || (buck[ti-1].add <= add)) - { - // found insertion point - for (ptrdiff_t k = count; k > ti;k--) - { - buck [k] = buck [k-1]; - } - buck[ti].add = add; - buck[ti].val = val; - count++; - return TRUE; - } - high = mid - 1; - } - else - { - if (buck[ti+1].add > add) - { - //found the insertion point - for (ptrdiff_t k = count; k > ti+1;k--) - { - buck [k] = buck [k-1]; - } - buck[ti+1].add = add; - buck[ti+1].val = val; - count++; - return TRUE; - } - low = mid + 1; - } - } - assert (0); - return TRUE; -} - -void -sorted_table::remove (uint8_t* add) -{ - ptrdiff_t high = (count-1); - ptrdiff_t low = 0; - ptrdiff_t ti; - ptrdiff_t mid; - bk* buck = buckets(); - while (low <= high) - { - mid = ((low + high)/2); - ti = mid; - if (buck[ti].add > add) - { - if (buck[ti-1].add <= add) - { - for (ptrdiff_t k = ti; k < count; k++) - buck[k-1] = buck[k]; - count--; - return; - } - high = mid - 1; - } - else - { - if (buck[ti+1].add > add) - { - for (ptrdiff_t k = ti+1; k < count; k++) - buck[k-1] = buck[k]; - count--; - return; - } - low = mid + 1; - } - } - assert (0); -} - -void -sorted_table::clear() -{ - count = 1; - buckets()[0].add = MAX_PTR; -} -#endif //FEATURE_BASICFREEZE - -#ifdef USE_REGIONS -inline -size_t get_skewed_basic_region_index_for_address (uint8_t* address) -{ - assert ((g_gc_lowest_address <= address) && (address <= g_gc_highest_address)); - size_t skewed_basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; - return skewed_basic_region_index; -} - -inline -size_t get_basic_region_index_for_address (uint8_t* address) -{ - size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (address); - return (skewed_basic_region_index - get_skewed_basic_region_index_for_address (g_gc_lowest_address)); -} - -// Go from a random address to its region info. The random address could be -// in one of the basic regions of a larger region so we need to check for that. -inline -heap_segment* get_region_info_for_address (uint8_t* address) -{ - size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; - heap_segment* basic_region_info_entry = (heap_segment*)&seg_mapping_table[basic_region_index]; - ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated (basic_region_info_entry); - if (first_field < 0) - { - basic_region_index += first_field; - } - - return ((heap_segment*)(&seg_mapping_table[basic_region_index])); -} - -// Go from the physical start of a region to its region info. -inline -heap_segment* get_region_info (uint8_t* region_start) -{ - size_t region_index = (size_t)region_start >> gc_heap::min_segment_size_shr; - heap_segment* region_info_entry = (heap_segment*)&seg_mapping_table[region_index]; - dprintf (REGIONS_LOG, ("region info for region %p is at %zd, %zx (alloc: %p)", - region_start, region_index, (size_t)region_info_entry, heap_segment_allocated (region_info_entry))); - return (heap_segment*)&seg_mapping_table[region_index]; -} - -// Go from the actual region info to its region start. -inline -uint8_t* get_region_start (heap_segment* region_info) -{ - uint8_t* obj_start = heap_segment_mem (region_info); - return (obj_start - sizeof (aligned_plug_and_gap)); -} - -inline -size_t get_region_size (heap_segment* region_info) -{ - return (size_t)(heap_segment_reserved (region_info) - get_region_start (region_info)); -} - -inline -size_t get_region_committed_size (heap_segment* region) -{ - uint8_t* start = get_region_start (region); - uint8_t* committed = heap_segment_committed (region); - return committed - start; -} - -inline bool is_free_region (heap_segment* region) -{ - return (heap_segment_allocated (region) == nullptr); -} - - -#endif //USE_REGIONS - -inline -uint8_t* align_on_segment (uint8_t* add) -{ - return (uint8_t*)((size_t)(add + (((size_t)1 << gc_heap::min_segment_size_shr) - 1)) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1)); -} - -inline -uint8_t* align_lower_segment (uint8_t* add) -{ - return (uint8_t*)((size_t)(add) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1)); -} - - -#ifdef FEATURE_BASICFREEZE -inline -size_t ro_seg_begin_index (heap_segment* seg) -{ -#ifdef USE_REGIONS - size_t begin_index = (size_t)heap_segment_mem (seg) >> gc_heap::min_segment_size_shr; -#else - size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr; -#endif //USE_REGIONS - begin_index = max (begin_index, (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr); - return begin_index; -} - -inline -size_t ro_seg_end_index (heap_segment* seg) -{ - size_t end_index = (size_t)(heap_segment_reserved (seg) - 1) >> gc_heap::min_segment_size_shr; - end_index = min (end_index, (size_t)g_gc_highest_address >> gc_heap::min_segment_size_shr); - return end_index; -} - - -heap_segment* ro_segment_lookup (uint8_t* o) -{ - uint8_t* ro_seg_start = o; - heap_segment* seg = (heap_segment*)gc_heap::seg_table->lookup (ro_seg_start); - - if (ro_seg_start && in_range_for_segment (o, seg)) - return seg; - else - return 0; -} - -#endif //FEATURE_BASICFREEZE - -#ifdef MULTIPLE_HEAPS -inline -gc_heap* seg_mapping_table_heap_of_worker (uint8_t* o) -{ - size_t index = (size_t)o >> gc_heap::min_segment_size_shr; - seg_mapping* entry = &seg_mapping_table[index]; - -#ifdef USE_REGIONS - gc_heap* hp = heap_segment_heap ((heap_segment*)entry); -#else - gc_heap* hp = ((o > entry->boundary) ? entry->h1 : entry->h0); - - dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, h0: %p, seg0: %p, h1: %p, seg1: %p", - o, index, (entry->boundary + 1), - (uint8_t*)(entry->h0), (uint8_t*)(entry->seg0), - (uint8_t*)(entry->h1), (uint8_t*)(entry->seg1))); - -#ifdef _DEBUG - heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0); -#ifdef FEATURE_BASICFREEZE - if ((size_t)seg & ro_in_entry) - seg = (heap_segment*)((size_t)seg & ~ro_in_entry); -#endif //FEATURE_BASICFREEZE - -#ifdef TRACE_GC - if (seg) - { - if (in_range_for_segment (o, seg)) - { - dprintf (2, ("obj %p belongs to segment %p(-%p)", o, seg, (uint8_t*)heap_segment_allocated (seg))); - } - else - { - dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg", - seg, (uint8_t*)heap_segment_allocated (seg), o)); - } - } - else - { - dprintf (2, ("could not find obj %p in any existing segments", o)); - } -#endif //TRACE_GC -#endif //_DEBUG -#endif //USE_REGIONS - return hp; -} - - -#endif //MULTIPLE_HEAPS - -// Only returns a valid seg if we can actually find o on the seg. -heap_segment* seg_mapping_table_segment_of (uint8_t* o) -{ -#ifdef FEATURE_BASICFREEZE - if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address)) - return ro_segment_lookup (o); -#endif //FEATURE_BASICFREEZE - - size_t index = (size_t)o >> gc_heap::min_segment_size_shr; - seg_mapping* entry = &seg_mapping_table[index]; - -#ifdef USE_REGIONS - // REGIONS TODO: I think we could simplify this to having the same info for each - // basic entry in a large region so we can get it right away instead of having to go - // back some entries. - ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated ((heap_segment*)entry); - if (first_field == 0) - { - dprintf (REGIONS_LOG, ("asked for seg for %p, in a freed region mem: %p, committed %p", - o, heap_segment_mem ((heap_segment*)entry), - heap_segment_committed ((heap_segment*)entry))); - return 0; - } - // Regions are never going to intersect an ro seg, so this can never be ro_in_entry. - assert (first_field != 0); - assert (first_field != ro_in_entry); - if (first_field < 0) - { - index += first_field; - } - heap_segment* seg = (heap_segment*)&seg_mapping_table[index]; -#else //USE_REGIONS - dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, seg0: %p, seg1: %p", - o, index, (entry->boundary + 1), - (uint8_t*)(entry->seg0), (uint8_t*)(entry->seg1))); - - heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0); -#ifdef FEATURE_BASICFREEZE - if ((size_t)seg & ro_in_entry) - seg = (heap_segment*)((size_t)seg & ~ro_in_entry); -#endif //FEATURE_BASICFREEZE -#endif //USE_REGIONS - - if (seg) - { - if (in_range_for_segment (o, seg)) - { - dprintf (2, ("obj %p belongs to segment %p(-%p)", o, (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg))); - } - else - { - dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg, setting it to 0", - (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg), o)); - seg = 0; - } - } - else - { - dprintf (2, ("could not find obj %p in any existing segments", o)); - } - -#ifdef FEATURE_BASICFREEZE - // TODO: This was originally written assuming that the seg_mapping_table would always contain entries for ro - // segments whenever the ro segment falls into the [g_gc_lowest_address,g_gc_highest_address) range. I.e., it had an - // extra "&& (size_t)(entry->seg1) & ro_in_entry" expression. However, at the moment, grow_brick_card_table does - // not correctly go through the ro segments and add them back to the seg_mapping_table when the [lowest,highest) - // range changes. We should probably go ahead and modify grow_brick_card_table and put back the - // "&& (size_t)(entry->seg1) & ro_in_entry" here. - if (!seg) - { - seg = ro_segment_lookup (o); - if (seg && !in_range_for_segment (o, seg)) - seg = 0; - } -#endif //FEATURE_BASICFREEZE - - return seg; -} - -size_t gcard_of ( uint8_t*); - -#define GC_MARKED (size_t)0x1 -#ifdef DOUBLY_LINKED_FL -// This bit indicates that we'll need to set the bgc mark bit for this object during an FGC. -// We only do this when we decide to compact. -#define BGC_MARKED_BY_FGC (size_t)0x2 -#define MAKE_FREE_OBJ_IN_COMPACT (size_t)0x4 -#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED|BGC_MARKED_BY_FGC|MAKE_FREE_OBJ_IN_COMPACT) -#else //DOUBLY_LINKED_FL -#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED) -#endif //!DOUBLY_LINKED_FL - -#ifdef HOST_64BIT -#define SPECIAL_HEADER_BITS (0x7) -#else -#define SPECIAL_HEADER_BITS (0x3) -#endif - -#define slot(i, j) ((uint8_t**)(i))[(j)+1] - -#define free_object_base_size (plug_skew + sizeof(ArrayBase)) - -#define free_list_slot(x) ((uint8_t**)(x))[2] -#define free_list_undo(x) ((uint8_t**)(x))[-1] -#define UNDO_EMPTY ((uint8_t*)1) - -#ifdef DOUBLY_LINKED_FL -#define free_list_prev(x) ((uint8_t**)(x))[3] -#define PREV_EMPTY ((uint8_t*)1) - -void check_and_clear_in_free_list (uint8_t* o, size_t size) -{ - if (size >= min_free_list) - { - free_list_prev (o) = PREV_EMPTY; - } -} - -#endif //DOUBLY_LINKED_FL - -class CObjectHeader : public Object -{ -public: - -#if defined(FEATURE_NATIVEAOT) || defined(BUILD_AS_STANDALONE) - // The GC expects the following methods that are provided by the Object class in the CLR but not provided - // by NativeAOT's version of Object. - uint32_t GetNumComponents() - { - return ((ArrayBase *)this)->GetNumComponents(); - } - - void Validate(BOOL bDeep=TRUE, BOOL bVerifyNextHeader = FALSE, BOOL bVerifySyncBlock = FALSE) - { - // declaration of extra parameters just so the call site would need no #ifdefs - UNREFERENCED_PARAMETER(bVerifyNextHeader); - UNREFERENCED_PARAMETER(bVerifySyncBlock); - - MethodTable * pMT = GetMethodTable(); - - _ASSERTE(pMT->SanityCheck()); - - bool noRangeChecks = - (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_NO_RANGE_CHECKS) == GCConfig::HEAPVERIFY_NO_RANGE_CHECKS; - - BOOL fSmallObjectHeapPtr = FALSE, fLargeObjectHeapPtr = FALSE; - if (!noRangeChecks) - { - fSmallObjectHeapPtr = g_theGCHeap->IsHeapPointer(this, TRUE); - if (!fSmallObjectHeapPtr) - fLargeObjectHeapPtr = g_theGCHeap->IsHeapPointer(this); - - _ASSERTE(fSmallObjectHeapPtr || fLargeObjectHeapPtr); - } - -#ifdef FEATURE_STRUCTALIGN - _ASSERTE(IsStructAligned((uint8_t *)this, GetMethodTable()->GetBaseAlignment())); -#endif // FEATURE_STRUCTALIGN - -#if defined(FEATURE_64BIT_ALIGNMENT) && !defined(FEATURE_NATIVEAOT) - if (pMT->RequiresAlign8()) - { - _ASSERTE((((size_t)this) & 0x7) == (pMT->IsValueType() ? 4U : 0U)); - } -#endif // FEATURE_64BIT_ALIGNMENT - -#ifdef VERIFY_HEAP - if (bDeep && (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)) - g_theGCHeap->ValidateObjectMember(this); -#endif - if (fSmallObjectHeapPtr) - { -#ifdef FEATURE_BASICFREEZE - _ASSERTE(!g_theGCHeap->IsLargeObject(this) || g_theGCHeap->IsInFrozenSegment(this)); -#else - _ASSERTE(!g_theGCHeap->IsLargeObject(this)); -#endif - } - } - - void ValidateHeap(BOOL bDeep) - { - Validate(bDeep); - } - -#endif //FEATURE_NATIVEAOT || BUILD_AS_STANDALONE - - ///// - // - // Header Status Information - // - - MethodTable *GetMethodTable() const - { - return( (MethodTable *) (((size_t) RawGetMethodTable()) & (~SPECIAL_HEADER_BITS))); - } - - void SetMarked() - { - _ASSERTE(RawGetMethodTable()); - RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | GC_MARKED)); - } - - BOOL IsMarked() const - { - return !!(((size_t)RawGetMethodTable()) & GC_MARKED); - } - - void SetPinned() - { - assert (!(gc_heap::settings.concurrent)); - GetHeader()->SetGCBit(); - } - - BOOL IsPinned() const - { - return !!((((CObjectHeader*)this)->GetHeader()->GetBits()) & BIT_SBLK_GC_RESERVE); - } - - // Now we set more bits should actually only clear the mark bit - void ClearMarked() - { -#ifdef DOUBLY_LINKED_FL - RawSetMethodTable ((MethodTable *)(((size_t) RawGetMethodTable()) & (~GC_MARKED))); -#else - RawSetMethodTable (GetMethodTable()); -#endif //DOUBLY_LINKED_FL - } - -#ifdef DOUBLY_LINKED_FL - void SetBGCMarkBit() - { - RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | BGC_MARKED_BY_FGC)); - } - BOOL IsBGCMarkBitSet() const - { - return !!(((size_t)RawGetMethodTable()) & BGC_MARKED_BY_FGC); - } - void ClearBGCMarkBit() - { - RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~BGC_MARKED_BY_FGC))); - } - - void SetFreeObjInCompactBit() - { - RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | MAKE_FREE_OBJ_IN_COMPACT)); - } - BOOL IsFreeObjInCompactBitSet() const - { - return !!(((size_t)RawGetMethodTable()) & MAKE_FREE_OBJ_IN_COMPACT); - } - void ClearFreeObjInCompactBit() - { -#ifdef _DEBUG - // check this looks like an object, but do NOT validate pointers to other objects - // as these may not be valid yet - we are calling this during compact_phase - Validate(FALSE); -#endif //_DEBUG - RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~MAKE_FREE_OBJ_IN_COMPACT))); - } -#endif //DOUBLY_LINKED_FL - - size_t ClearSpecialBits() - { - size_t special_bits = ((size_t)RawGetMethodTable()) & SPECIAL_HEADER_BITS; - if (special_bits != 0) - { - assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0); - RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) & ~(SPECIAL_HEADER_BITS))); - } - return special_bits; - } - - void SetSpecialBits (size_t special_bits) - { - assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0); - if (special_bits != 0) - { - RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) | special_bits)); - } - } - - CGCDesc *GetSlotMap () - { - assert (GetMethodTable()->ContainsGCPointers()); - return CGCDesc::GetCGCDescFromMT(GetMethodTable()); - } - - void SetFree(size_t size) - { - assert (size >= free_object_base_size); - - assert (g_gc_pFreeObjectMethodTable->GetBaseSize() == free_object_base_size); - assert (g_gc_pFreeObjectMethodTable->RawGetComponentSize() == 1); - - RawSetMethodTable( g_gc_pFreeObjectMethodTable ); - - size_t* numComponentsPtr = (size_t*) &((uint8_t*) this)[ArrayBase::GetOffsetOfNumComponents()]; - *numComponentsPtr = size - free_object_base_size; -#ifdef VERIFY_HEAP - //This introduces a bug in the free list management. - //((void**) this)[-1] = 0; // clear the sync block, - assert (*numComponentsPtr >= 0); - if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) - { - memset (((uint8_t*)this)+sizeof(ArrayBase), 0xcc, *numComponentsPtr); -#ifdef DOUBLY_LINKED_FL - // However, in this case we can't leave the Next field uncleared because no one will clear it - // so it remains 0xcc and that's not good for verification - if (*numComponentsPtr > 0) - { - free_list_slot (this) = 0; - } -#endif //DOUBLY_LINKED_FL - } -#endif //VERIFY_HEAP - -#ifdef DOUBLY_LINKED_FL - // For background GC, we need to distinguish between a free object that's not on the free list - // and one that is. So we always set its prev to PREV_EMPTY to indicate that it's a free - // object that's not on the free list. If it should be on the free list, it will be set to the - // appropriate non zero value. - check_and_clear_in_free_list ((uint8_t*)this, size); -#endif //DOUBLY_LINKED_FL - } - - void UnsetFree() - { - size_t size = free_object_base_size - plug_skew; - - // since we only need to clear 2 ptr size, we do it manually - PTR_PTR m = (PTR_PTR) this; - for (size_t i = 0; i < size / sizeof(PTR_PTR); i++) - *(m++) = 0; - } - - BOOL IsFree () const - { - return (GetMethodTable() == g_gc_pFreeObjectMethodTable); - } - -#ifdef FEATURE_STRUCTALIGN - int GetRequiredAlignment () const - { - return GetMethodTable()->GetRequiredAlignment(); - } -#endif // FEATURE_STRUCTALIGN - - BOOL ContainsGCPointers() const - { - return GetMethodTable()->ContainsGCPointers(); - } - -#ifdef COLLECTIBLE_CLASS - BOOL Collectible() const - { - return GetMethodTable()->Collectible(); - } - - FORCEINLINE BOOL ContainsGCPointersOrCollectible() const - { - MethodTable *pMethodTable = GetMethodTable(); - return (pMethodTable->ContainsGCPointers() || pMethodTable->Collectible()); - } -#endif //COLLECTIBLE_CLASS - - Object* GetObjectBase() const - { - return (Object*) this; - } -}; - -#define header(i) ((CObjectHeader*)(i)) -#define method_table(o) ((CObjectHeader*)(o))->GetMethodTable() - -#ifdef DOUBLY_LINKED_FL -inline -BOOL is_on_free_list (uint8_t* o, size_t size) -{ - if (size >= min_free_list) - { - if (header(o)->GetMethodTable() == g_gc_pFreeObjectMethodTable) - { - return (free_list_prev (o) != PREV_EMPTY); - } - } - - return FALSE; -} - -inline -void set_plug_bgc_mark_bit (uint8_t* node) -{ - header(node)->SetBGCMarkBit(); -} - -inline -BOOL is_plug_bgc_mark_bit_set (uint8_t* node) -{ - return header(node)->IsBGCMarkBitSet(); -} - -inline -void clear_plug_bgc_mark_bit (uint8_t* node) -{ - header(node)->ClearBGCMarkBit(); -} - -inline -void set_free_obj_in_compact_bit (uint8_t* node) -{ - header(node)->SetFreeObjInCompactBit(); -} - -inline -BOOL is_free_obj_in_compact_bit_set (uint8_t* node) -{ - return header(node)->IsFreeObjInCompactBitSet(); -} - -inline -void clear_free_obj_in_compact_bit (uint8_t* node) -{ - header(node)->ClearFreeObjInCompactBit(); -} -#endif //DOUBLY_LINKED_FL - -#ifdef SHORT_PLUGS -inline -void set_plug_padded (uint8_t* node) -{ - header(node)->SetMarked(); -} -inline -void clear_plug_padded (uint8_t* node) -{ - header(node)->ClearMarked(); -} -inline -BOOL is_plug_padded (uint8_t* node) -{ - return header(node)->IsMarked(); -} -#else //SHORT_PLUGS -inline void set_plug_padded (uint8_t* node){} -inline void clear_plug_padded (uint8_t* node){} -inline -BOOL is_plug_padded (uint8_t* node){return FALSE;} -#endif //SHORT_PLUGS - - -inline size_t unused_array_size(uint8_t * p) -{ - assert(((CObjectHeader*)p)->IsFree()); - - size_t* numComponentsPtr = (size_t*)(p + ArrayBase::GetOffsetOfNumComponents()); - return free_object_base_size + *numComponentsPtr; -} - -inline -heap_segment* heap_segment_non_sip (heap_segment* ns) -{ -#ifdef USE_REGIONS - if ((ns == 0) || !heap_segment_swept_in_plan (ns)) - { - return ns; - } - else - { - do - { - if (heap_segment_swept_in_plan (ns)) - { - dprintf (REGIONS_LOG, ("region %p->%p SIP", - heap_segment_mem (ns), heap_segment_allocated (ns))); - } - - ns = heap_segment_next (ns); - } while ((ns != 0) && heap_segment_swept_in_plan (ns)); - return ns; - } -#else //USE_REGIONS - return ns; -#endif //USE_REGIONS -} - -inline -heap_segment* heap_segment_next_non_sip (heap_segment* seg) -{ - heap_segment* ns = heap_segment_next (seg); -#ifdef USE_REGIONS - return heap_segment_non_sip (ns); -#else - return ns; -#endif //USE_REGIONS -} - -heap_segment* heap_segment_rw (heap_segment* ns) -{ - if ((ns == 0) || !heap_segment_read_only_p (ns)) - { - return ns; - } - else - { - do - { - ns = heap_segment_next (ns); - } while ((ns != 0) && heap_segment_read_only_p (ns)); - return ns; - } -} - -//returns the next non ro segment. -heap_segment* heap_segment_next_rw (heap_segment* seg) -{ - heap_segment* ns = heap_segment_next (seg); - return heap_segment_rw (ns); -} - -// returns the segment before seg. -heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg) -{ - assert (begin != 0); - heap_segment* prev = begin; - heap_segment* current = heap_segment_next_rw (begin); - - while (current && current != seg) - { - prev = current; - current = heap_segment_next_rw (current); - } - - if (current == seg) - { - return prev; - } - else - { - return 0; - } -} - -// returns the segment before seg. - - -heap_segment* heap_segment_in_range (heap_segment* ns) -{ - if ((ns == 0) || heap_segment_in_range_p (ns)) - { - return ns; - } - else - { - do - { - ns = heap_segment_next (ns); - } while ((ns != 0) && !heap_segment_in_range_p (ns)); - return ns; - } -} - -heap_segment* heap_segment_next_in_range (heap_segment* seg) -{ - heap_segment* ns = heap_segment_next (seg); - return heap_segment_in_range (ns); -} - -struct imemory_data -{ - uint8_t* memory_base; -}; - -struct numa_reserved_block -{ - uint8_t* memory_base; - size_t block_size; - - numa_reserved_block() : memory_base(nullptr), block_size(0) { } -}; - -struct initial_memory_details -{ - imemory_data *initial_memory; - imemory_data *initial_normal_heap; // points into initial_memory_array - imemory_data *initial_large_heap; // points into initial_memory_array - imemory_data *initial_pinned_heap; // points into initial_memory_array - - size_t block_size_normal; - size_t block_size_large; - size_t block_size_pinned; - - int block_count; // # of blocks in each - int current_block_normal; - int current_block_large; - int current_block_pinned; - - enum - { - ALLATONCE = 1, - EACH_GENERATION, - EACH_BLOCK, - ALLATONCE_SEPARATED_POH, - EACH_NUMA_NODE - }; - - size_t allocation_pattern; - - size_t block_size(int i) - { - switch (i / block_count) - { - case 0: return block_size_normal; - case 1: return block_size_large; - case 2: return block_size_pinned; - default: UNREACHABLE(); - } - }; - - void* get_initial_memory (int gen, int h_number) - { - switch (gen) - { - case soh_gen0: - case soh_gen1: - case soh_gen2: return initial_normal_heap[h_number].memory_base; - case loh_generation: return initial_large_heap[h_number].memory_base; - case poh_generation: return initial_pinned_heap[h_number].memory_base; - default: UNREACHABLE(); - } - }; - - size_t get_initial_size (int gen) - { - switch (gen) - { - case soh_gen0: - case soh_gen1: - case soh_gen2: return block_size_normal; - case loh_generation: return block_size_large; - case poh_generation: return block_size_pinned; - default: UNREACHABLE(); - } - }; - - int numa_reserved_block_count; - numa_reserved_block* numa_reserved_block_table; -}; - -initial_memory_details memory_details; - -heap_segment* make_initial_segment (int gen, int h_number, gc_heap* hp) -{ - void* mem = memory_details.get_initial_memory (gen, h_number); - size_t size = memory_details.get_initial_size (gen); - heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size, hp, gen); - - return res; -} - -void* virtual_alloc (size_t size) -{ - return virtual_alloc(size, false); -} - -void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node) -{ - size_t requested_size = size; - - if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size) - { - gc_heap::reserved_memory_limit = gc_heap::reserved_memory_limit + requested_size; - if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size) - { - return 0; - } - } - - uint32_t flags = VirtualReserveFlags::None; -#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - if (virtual_alloc_hardware_write_watch) - { - flags = VirtualReserveFlags::WriteWatch; - } -#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - - void* prgmem = use_large_pages_p ? - GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size, numa_node) : - GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags, numa_node); - void *aligned_mem = prgmem; - - // We don't want (prgmem + size) to be right at the end of the address space - // because we'd have to worry about that everytime we do (address + size). - // We also want to make sure that we leave loh_size_threshold at the end - // so we allocate a small object we don't need to worry about overflow there - // when we do alloc_ptr+size. - if (prgmem) - { - uint8_t* end_mem = (uint8_t*)prgmem + requested_size; - - if ((end_mem == 0) || ((size_t)(MAX_PTR - end_mem) <= END_SPACE_AFTER_GC)) - { - GCToOSInterface::VirtualRelease (prgmem, requested_size); - dprintf (2, ("Virtual Alloc size %zd returned memory right against 4GB [%zx, %zx[ - discarding", - requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size))); - prgmem = 0; - aligned_mem = 0; - } - } - - if (prgmem) - { - gc_heap::reserved_memory += requested_size; - } - - dprintf (2, ("Virtual Alloc size %zd: [%zx, %zx[", - requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size))); - - return aligned_mem; -} - -static size_t get_valid_segment_size (BOOL large_seg=FALSE) -{ - size_t seg_size, initial_seg_size; - - if (!large_seg) - { - initial_seg_size = INITIAL_ALLOC; - seg_size = static_cast(GCConfig::GetSegmentSize()); - } - else - { - initial_seg_size = LHEAP_ALLOC; - seg_size = static_cast(GCConfig::GetSegmentSize()) / 2; - } - -#ifdef MULTIPLE_HEAPS -#ifdef HOST_64BIT - if (!large_seg) -#endif // HOST_64BIT - { - if (g_num_processors > 4) - initial_seg_size /= 2; - if (g_num_processors > 8) - initial_seg_size /= 2; - } -#endif //MULTIPLE_HEAPS - - // if seg_size is small but not 0 (0 is default if config not set) - // then set the segment to the minimum size - if (!g_theGCHeap->IsValidSegmentSize(seg_size)) - { - // if requested size is between 1 byte and 4MB, use min - if ((seg_size >> 1) && !(seg_size >> 22)) - seg_size = 1024*1024*4; - else - seg_size = initial_seg_size; - } - -#ifdef HOST_64BIT - seg_size = round_up_power2 (seg_size); -#else - seg_size = round_down_power2 (seg_size); -#endif // HOST_64BIT - - return (seg_size); -} - -#ifndef USE_REGIONS -void -gc_heap::compute_new_ephemeral_size() -{ - int eph_gen_max = max_generation - 1 - (settings.promotion ? 1 : 0); - size_t padding_size = 0; - - for (int i = 0; i <= eph_gen_max; i++) - { - dynamic_data* dd = dynamic_data_of (i); - total_ephemeral_size += (dd_survived_size (dd) - dd_pinned_survived_size (dd)); -#ifdef RESPECT_LARGE_ALIGNMENT - total_ephemeral_size += dd_num_npinned_plugs (dd) * switch_alignment_size (FALSE); -#endif //RESPECT_LARGE_ALIGNMENT -#ifdef FEATURE_STRUCTALIGN - total_ephemeral_size += dd_num_npinned_plugs (dd) * MAX_STRUCTALIGN; -#endif //FEATURE_STRUCTALIGN - -#ifdef SHORT_PLUGS - padding_size += dd_padding_size (dd); -#endif //SHORT_PLUGS - } - - total_ephemeral_size += eph_gen_starts_size; - -#ifdef RESPECT_LARGE_ALIGNMENT - size_t planned_ephemeral_size = heap_segment_plan_allocated (ephemeral_heap_segment) - - generation_plan_allocation_start (generation_of (max_generation-1)); - total_ephemeral_size = min (total_ephemeral_size, planned_ephemeral_size); -#endif //RESPECT_LARGE_ALIGNMENT - -#ifdef SHORT_PLUGS - total_ephemeral_size = Align ((size_t)((double)total_ephemeral_size * short_plugs_pad_ratio) + 1); - total_ephemeral_size += Align (DESIRED_PLUG_LENGTH); -#endif //SHORT_PLUGS - - dprintf (3, ("total ephemeral size is %zx, padding %zx(%zx)", - total_ephemeral_size, - padding_size, (total_ephemeral_size - padding_size))); -} - -heap_segment* -gc_heap::soh_get_segment_to_expand() -{ - size_t size = soh_segment_size; - - ordered_plug_indices_init = FALSE; - use_bestfit = FALSE; - - //compute the size of the new ephemeral heap segment. - compute_new_ephemeral_size(); - - if ((settings.pause_mode != pause_low_latency) && - (settings.pause_mode != pause_no_gc) -#ifdef BACKGROUND_GC - && (!gc_heap::background_running_p()) -#endif //BACKGROUND_GC - ) - { - assert (settings.condemned_generation <= max_generation); - allocator* gen_alloc = ((settings.condemned_generation == max_generation) ? nullptr : - generation_allocator (generation_of (max_generation))); - dprintf (2, ("(gen%d)soh_get_segment_to_expand", settings.condemned_generation)); - - // try to find one in the gen 2 segment list, search backwards because the first segments - // tend to be more compact than the later ones. - heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation))); - - _ASSERTE(fseg != NULL); - -#ifdef SEG_REUSE_STATS - int try_reuse = 0; -#endif //SEG_REUSE_STATS - - heap_segment* seg = ephemeral_heap_segment; - while ((seg = heap_segment_prev_rw (fseg, seg)) && (seg != fseg)) - { -#ifdef SEG_REUSE_STATS - try_reuse++; -#endif //SEG_REUSE_STATS - - if (can_expand_into_p (seg, size/3, total_ephemeral_size, gen_alloc)) - { - get_gc_data_per_heap()->set_mechanism (gc_heap_expand, - (use_bestfit ? expand_reuse_bestfit : expand_reuse_normal)); - if (settings.condemned_generation == max_generation) - { - if (use_bestfit) - { - build_ordered_free_spaces (seg); - dprintf (GTC_LOG, ("can use best fit")); - } - -#ifdef SEG_REUSE_STATS - dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse", - settings.condemned_generation, try_reuse)); -#endif //SEG_REUSE_STATS - dprintf (GTC_LOG, ("max_gen: Found existing segment to expand into %zx", (size_t)seg)); - return seg; - } - else - { -#ifdef SEG_REUSE_STATS - dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse - returning", - settings.condemned_generation, try_reuse)); -#endif //SEG_REUSE_STATS - dprintf (GTC_LOG, ("max_gen-1: Found existing segment to expand into %zx", (size_t)seg)); - - // If we return 0 here, the allocator will think since we are short on end - // of seg we need to trigger a full compacting GC. So if sustained low latency - // is set we should acquire a new seg instead, that way we wouldn't be short. - // The real solution, of course, is to actually implement seg reuse in gen1. - if (settings.pause_mode != pause_sustained_low_latency) - { - dprintf (GTC_LOG, ("max_gen-1: SustainedLowLatency is set, acquire a new seg")); - get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_next_full_gc); - return 0; - } - } - } - } - } - - heap_segment* result = get_segment (size, gc_oh_num::soh); - - if(result) - { -#ifdef BACKGROUND_GC - if (current_c_gc_state == c_gc_state_planning) - { - // When we expand heap during bgc sweep, we set the seg to be swept so - // we'll always look at cards for objects on the new segment. - result->flags |= heap_segment_flags_swept; - } -#endif //BACKGROUND_GC - - FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(result), - (size_t)(heap_segment_reserved (result) - heap_segment_mem(result)), - gc_etw_segment_small_object_heap); - } - - get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (result ? expand_new_seg : expand_no_memory)); - - if (result == 0) - { - dprintf (2, ("h%d: failed to allocate a new segment!", heap_number)); - } - else - { -#ifdef MULTIPLE_HEAPS - heap_segment_heap (result) = this; -#endif //MULTIPLE_HEAPS - } - - dprintf (GTC_LOG, ("(gen%d)creating new segment %p", settings.condemned_generation, result)); - return result; -} - -//returns 0 in case of allocation failure -heap_segment* -gc_heap::get_segment (size_t size, gc_oh_num oh) -{ - assert(oh != gc_oh_num::unknown); - BOOL uoh_p = (oh == gc_oh_num::loh) || (oh == gc_oh_num::poh); - if (heap_hard_limit) - return NULL; - - heap_segment* result = 0; - - if (segment_standby_list != 0) - { - result = segment_standby_list; - heap_segment* last = 0; - while (result) - { - size_t hs = (size_t)(heap_segment_reserved (result) - (uint8_t*)result); - if ((hs >= size) && ((hs / 2) < size)) - { - dprintf (2, ("Hoarded segment %zx found", (size_t) result)); - if (last) - { - heap_segment_next (last) = heap_segment_next (result); - } - else - { - segment_standby_list = heap_segment_next (result); - } - break; - } - else - { - last = result; - result = heap_segment_next (result); - } - } - } - - if (result) - { - init_heap_segment (result, __this); -#ifdef BACKGROUND_GC - if (is_bgc_in_progress()) - { - dprintf (GC_TABLE_LOG, ("hoarded seg %p, mark_array is %p", result, mark_array)); - if (!commit_mark_array_new_seg (__this, result)) - { - dprintf (GC_TABLE_LOG, ("failed to commit mark array for hoarded seg")); - // If we can't use it we need to thread it back. - if (segment_standby_list != 0) - { - heap_segment_next (result) = segment_standby_list; - segment_standby_list = result; - } - else - { - segment_standby_list = result; - } - - result = 0; - } - } -#endif //BACKGROUND_GC - - if (result) - seg_mapping_table_add_segment (result, __this); - } - - if (!result) - { - void* mem = virtual_alloc (size); - if (!mem) - { - fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p); - return 0; - } - - result = make_heap_segment ((uint8_t*)mem, size, __this, (oh + max_generation)); - - if (result) - { - uint8_t* start; - uint8_t* end; - if (mem < g_gc_lowest_address) - { - start = (uint8_t*)mem; - } - else - { - start = (uint8_t*)g_gc_lowest_address; - } - - if (((uint8_t*)mem + size) > g_gc_highest_address) - { - end = (uint8_t*)mem + size; - } - else - { - end = (uint8_t*)g_gc_highest_address; - } - - if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, uoh_p) != 0) - { - // release_segment needs the flags to decrement the proper bucket - size_t flags = 0; - if (oh == poh) - { - flags = heap_segment_flags_poh; - } - else if (oh == loh) - { - flags = heap_segment_flags_loh; - } - result->flags |= flags; - release_segment (result); - return 0; - } - } - else - { - fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p); - virtual_free (mem, size); - } - - if (result) - { - seg_mapping_table_add_segment (result, __this); - } - } - -#ifdef BACKGROUND_GC - if (result) - { - ::record_changed_seg ((uint8_t*)result, heap_segment_reserved (result), - settings.gc_index, current_bgc_state, - seg_added); - bgc_verify_mark_array_cleared (result); - } -#endif //BACKGROUND_GC - - dprintf (GC_TABLE_LOG, ("h%d: new seg: %p-%p (%zd)", heap_number, result, ((uint8_t*)result + size), size)); - return result; -} - -#endif //!USE_REGIONS - -#ifdef MULTIPLE_HEAPS -#ifdef HOST_X86 -#ifdef _MSC_VER -#pragma warning(disable:4035) - static ptrdiff_t get_cycle_count() - { - __asm rdtsc - } -#pragma warning(default:4035) -#elif defined(__GNUC__) - static ptrdiff_t get_cycle_count() - { - ptrdiff_t cycles; - ptrdiff_t cyclesHi; - __asm__ __volatile__ - ("rdtsc":"=a" (cycles), "=d" (cyclesHi)); - return cycles; - } -#else //_MSC_VER -#error Unknown compiler -#endif //_MSC_VER -#elif defined(TARGET_AMD64) -#ifdef _MSC_VER -extern "C" uint64_t __rdtsc(); -#pragma intrinsic(__rdtsc) - static ptrdiff_t get_cycle_count() - { - return (ptrdiff_t)__rdtsc(); - } -#elif defined(__GNUC__) - static ptrdiff_t get_cycle_count() - { - ptrdiff_t cycles; - ptrdiff_t cyclesHi; - __asm__ __volatile__ - ("rdtsc":"=a" (cycles), "=d" (cyclesHi)); - return (cyclesHi << 32) | cycles; - } -#else // _MSC_VER - extern "C" ptrdiff_t get_cycle_count(void); -#endif // _MSC_VER -#elif defined(TARGET_LOONGARCH64) - static ptrdiff_t get_cycle_count() - { - ////FIXME: TODO for LOONGARCH64: - //ptrdiff_t cycle; - __asm__ volatile ("break 0 \n"); - return 0; - } -#else - static ptrdiff_t get_cycle_count() - { - // @ARMTODO, @ARM64TODO, @WASMTODO: cycle counter is not exposed to user mode. For now (until we can show this - // makes a difference on the configurations on which we'll run) just return 0. This will result in - // all buffer access times being reported as equal in access_time(). - return 0; - } -#endif //TARGET_X86 - -// We may not be on contiguous numa nodes so need to store -// the node index as well. -struct node_heap_count -{ - int node_no; - int heap_count; -}; - -class heap_select -{ - heap_select() {} -public: - static uint8_t* sniff_buffer; - static unsigned n_sniff_buffers; - static unsigned cur_sniff_index; - - static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS]; - static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS]; - static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS]; - static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4]; - -#ifdef HEAP_BALANCE_INSTRUMENTATION - // Note this is the total numa nodes GC heaps are on. There might be - // more on the machine if GC threads aren't using all of them. - static uint16_t total_numa_nodes; - static node_heap_count heaps_on_node[MAX_SUPPORTED_NODES]; -#endif - - static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers) - { - ptrdiff_t start_cycles = get_cycle_count(); - uint8_t sniff = sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE]; - assert (sniff == 0); - ptrdiff_t elapsed_cycles = get_cycle_count() - start_cycles; - // add sniff here just to defeat the optimizer - elapsed_cycles += sniff; - return (int) elapsed_cycles; - } - -public: - static BOOL init(int n_heaps) - { - assert (sniff_buffer == NULL && n_sniff_buffers == 0); - if (!GCToOSInterface::CanGetCurrentProcessorNumber()) - { - n_sniff_buffers = n_heaps*2+1; - size_t n_cache_lines = 1 + n_heaps * n_sniff_buffers + 1; - size_t sniff_buf_size = n_cache_lines * HS_CACHE_LINE_SIZE; - if (sniff_buf_size / HS_CACHE_LINE_SIZE != n_cache_lines) // check for overlow - { - return FALSE; - } - - sniff_buffer = new (nothrow) uint8_t[sniff_buf_size]; - if (sniff_buffer == 0) - return FALSE; - memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t)); - } - - bool do_numa = GCToOSInterface::CanEnableGCNumaAware(); - - // we want to assign heap indices such that there is a contiguous - // range of heap numbers for each numa node - - // we do this in two passes: - // 1. gather processor numbers and numa node numbers for all heaps - // 2. assign heap numbers for each numa node - - // Pass 1: gather processor numbers and numa node numbers - uint16_t proc_no[MAX_SUPPORTED_CPUS]; - uint16_t node_no[MAX_SUPPORTED_CPUS]; - uint16_t max_node_no = 0; - uint16_t heap_num; - for (heap_num = 0; heap_num < n_heaps; heap_num++) - { - if (!GCToOSInterface::GetProcessorForHeap (heap_num, &proc_no[heap_num], &node_no[heap_num])) - break; - assert(proc_no[heap_num] < MAX_SUPPORTED_CPUS); - if (!do_numa || node_no[heap_num] == NUMA_NODE_UNDEFINED) - node_no[heap_num] = 0; - max_node_no = max(max_node_no, node_no[heap_num]); - } - - // Pass 2: assign heap numbers by numa node - int cur_heap_no = 0; - for (uint16_t cur_node_no = 0; cur_node_no <= max_node_no; cur_node_no++) - { - for (int i = 0; i < heap_num; i++) - { - if (node_no[i] != cur_node_no) - continue; - - // we found a heap on cur_node_no - heap_no_to_proc_no[cur_heap_no] = proc_no[i]; - heap_no_to_numa_node[cur_heap_no] = cur_node_no; - - cur_heap_no++; - } - } - - return TRUE; - } - - static void init_cpu_mapping(int heap_number) - { - if (GCToOSInterface::CanGetCurrentProcessorNumber()) - { - uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); - // For a 32-bit process running on a machine with > 64 procs, - // even though the process can only use up to 32 procs, the processor - // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0, - // the GetCurrentProcessorNumber will return a number that's >= 64. - proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS] = (uint16_t)heap_number; - } - } - - static void mark_heap(int heap_number) - { - if (GCToOSInterface::CanGetCurrentProcessorNumber()) - return; - - for (unsigned sniff_index = 0; sniff_index < n_sniff_buffers; sniff_index++) - sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1; - } - - static int select_heap(alloc_context* acontext) - { -#ifndef TRACE_GC - UNREFERENCED_PARAMETER(acontext); // only referenced by dprintf -#endif //TRACE_GC - - if (GCToOSInterface::CanGetCurrentProcessorNumber()) - { - uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); - // For a 32-bit process running on a machine with > 64 procs, - // even though the process can only use up to 32 procs, the processor - // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0, - // the GetCurrentProcessorNumber will return a number that's >= 64. - int adjusted_heap = proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS]; - // with dynamic heap count, need to make sure the value is in range. - if (adjusted_heap >= gc_heap::n_heaps) - { - adjusted_heap %= gc_heap::n_heaps; - } - return adjusted_heap; - } - - unsigned sniff_index = Interlocked::Increment(&cur_sniff_index); - sniff_index %= n_sniff_buffers; - - int best_heap = 0; - int best_access_time = 1000*1000*1000; - int second_best_access_time = best_access_time; - - uint8_t *l_sniff_buffer = sniff_buffer; - unsigned l_n_sniff_buffers = n_sniff_buffers; - for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++) - { - int this_access_time = access_time(l_sniff_buffer, heap_number, sniff_index, l_n_sniff_buffers); - if (this_access_time < best_access_time) - { - second_best_access_time = best_access_time; - best_access_time = this_access_time; - best_heap = heap_number; - } - else if (this_access_time < second_best_access_time) - { - second_best_access_time = this_access_time; - } - } - - if (best_access_time*2 < second_best_access_time) - { - sniff_buffer[(1 + best_heap*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1; - - dprintf (3, ("select_heap yields crisp %d for context %p\n", best_heap, (void *)acontext)); - } - else - { - dprintf (3, ("select_heap yields vague %d for context %p\n", best_heap, (void *)acontext )); - } - - return best_heap; - } - - static bool can_find_heap_fast() - { - return GCToOSInterface::CanGetCurrentProcessorNumber(); - } - - static uint16_t find_proc_no_from_heap_no(int heap_number) - { - return heap_no_to_proc_no[heap_number]; - } - - static uint16_t find_numa_node_from_heap_no(int heap_number) - { - return heap_no_to_numa_node[heap_number]; - } - - static void init_numa_node_to_heap_map(int nheaps) - { - // Called right after GCHeap::Init() for each heap - // For each NUMA node used by the heaps, the - // numa_node_to_heap_map[numa_node] is set to the first heap number on that node and - // numa_node_to_heap_map[numa_node + 1] is set to the first heap number not on that node - // Set the start of the heap number range for the first NUMA node - numa_node_to_heap_map[heap_no_to_numa_node[0]] = 0; -#ifdef HEAP_BALANCE_INSTRUMENTATION - total_numa_nodes = 0; - memset (heaps_on_node, 0, sizeof (heaps_on_node)); - heaps_on_node[0].node_no = heap_no_to_numa_node[0]; - heaps_on_node[0].heap_count = 1; -#endif //HEAP_BALANCE_INSTRUMENTATION - - for (int i=1; i < nheaps; i++) - { - if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1]) - { -#ifdef HEAP_BALANCE_INSTRUMENTATION - total_numa_nodes++; - heaps_on_node[total_numa_nodes].node_no = heap_no_to_numa_node[i]; -#endif - - // Set the end of the heap number range for the previous NUMA node - numa_node_to_heap_map[heap_no_to_numa_node[i-1] + 1] = - // Set the start of the heap number range for the current NUMA node - numa_node_to_heap_map[heap_no_to_numa_node[i]] = (uint16_t)i; - } -#ifdef HEAP_BALANCE_INSTRUMENTATION - (heaps_on_node[total_numa_nodes].heap_count)++; -#endif - } - - // Set the end of the heap range for the last NUMA node - numa_node_to_heap_map[heap_no_to_numa_node[nheaps-1] + 1] = (uint16_t)nheaps; //mark the end with nheaps - -#ifdef HEAP_BALANCE_INSTRUMENTATION - total_numa_nodes++; -#endif - } - - static bool get_info_proc (int index, uint16_t* proc_no, uint16_t* node_no, int* start_heap, int* end_heap) - { - if (!GCToOSInterface::GetProcessorForHeap ((uint16_t)index, proc_no, node_no)) - return false; - - if (*node_no == NUMA_NODE_UNDEFINED) - *node_no = 0; - - *start_heap = (int)numa_node_to_heap_map[*node_no]; - *end_heap = (int)(numa_node_to_heap_map[*node_no + 1]); - - return true; - } - - static void distribute_other_procs (bool distribute_all_p) - { - if (affinity_config_specified_p) - return; - - if (distribute_all_p) - { - uint16_t current_heap_no_on_node[MAX_SUPPORTED_CPUS]; - memset (current_heap_no_on_node, 0, sizeof (current_heap_no_on_node)); - uint16_t current_heap_no = 0; - - uint16_t proc_no = 0; - uint16_t node_no = 0; - - for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++) - { - int start_heap, end_heap; - if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap)) - break; - - // This indicates there are heaps on this node - if ((end_heap - start_heap) > 0) - { - proc_no_to_heap_no[proc_no] = (current_heap_no_on_node[node_no] % (uint16_t)(end_heap - start_heap)) + (uint16_t)start_heap; - (current_heap_no_on_node[node_no])++; - } - else - { - proc_no_to_heap_no[proc_no] = current_heap_no % gc_heap::n_heaps; - (current_heap_no)++; - } - } - } - else - { - // This is for scenarios where GCHeapCount is specified as something like - // (g_num_active_processors - 2) to allow less randomization to the Server GC threads. - // In this case we want to assign the right heaps to those procs, ie if they share - // the same numa node we want to assign local heaps to those procs. Otherwise we - // let the heap balancing mechanism take over for now. - uint16_t proc_no = 0; - uint16_t node_no = 0; - int current_node_no = -1; - int current_heap_on_node = -1; - - for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++) - { - int start_heap, end_heap; - if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap)) - break; - - if ((end_heap - start_heap) > 0) - { - if (node_no == current_node_no) - { - // We already iterated through all heaps on this node, don't add more procs to these - // heaps. - if (current_heap_on_node >= end_heap) - { - continue; - } - } - else - { - current_node_no = node_no; - current_heap_on_node = start_heap; - } - - proc_no_to_heap_no[proc_no] = (uint16_t)current_heap_on_node; - - current_heap_on_node++; - } - } + if (mechanism >= 0) + { + descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i]; + dprintf (DT_LOG_0, ("[%2d]%s%s", + heap_index, + descr->name, + (descr->descr)[mechanism])); } } +#endif //SIMPLE_DPRINTF && DT_LOG +} + +void gc_history_global::print() +{ +#ifdef DT_LOG + char str_settings[64]; + memset (str_settings, '|', sizeof (char) * 64); + str_settings[max_global_mechanisms_count*2] = 0; - static void get_heap_range_for_heap(int hn, int* start, int* end) + for (int i = 0; i < max_global_mechanisms_count; i++) { - uint16_t numa_node = heap_no_to_numa_node[hn]; - *start = (int)numa_node_to_heap_map[numa_node]; - *end = (int)(numa_node_to_heap_map[numa_node+1]); -#ifdef HEAP_BALANCE_INSTRUMENTATION - dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end)); -#endif //HEAP_BALANCE_INSTRUMENTATION + str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N'); } -}; -uint8_t* heap_select::sniff_buffer; -unsigned heap_select::n_sniff_buffers; -unsigned heap_select::cur_sniff_index; -uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS]; -uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS]; -uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS]; -uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4]; -#ifdef HEAP_BALANCE_INSTRUMENTATION -uint16_t heap_select::total_numa_nodes; -node_heap_count heap_select::heaps_on_node[MAX_SUPPORTED_NODES]; -#endif - -#ifdef HEAP_BALANCE_INSTRUMENTATION -// This records info we use to look at effect of different strategies -// for heap balancing. -struct heap_balance_info -{ - uint64_t timestamp; - // This also encodes when we detect the thread runs on - // different proc during a balance attempt. Sometimes - // I observe this happens multiple times during one attempt! - // If this happens, I just record the last proc we observe - // and set MSB. - int tid; - // This records the final alloc_heap for the thread. - // - // This also encodes the reason why we needed to set_home_heap - // in balance_heaps. - // If we set it because the home heap is not the same as the proc, - // we set MSB. - // - // If we set ideal proc, we set the 2nd MSB. - int alloc_heap; - int ideal_proc_no; -}; -// This means inbetween each GC we can log at most this many entries per proc. -// This is usually enough. Most of the time we only need to log something every 128k -// of allocations in balance_heaps and gen0 budget is <= 200mb. -#define default_max_hb_heap_balance_info 4096 + dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|")); -struct heap_balance_info_proc -{ - int count; - int index; - heap_balance_info hb_info[default_max_hb_heap_balance_info]; -}; + dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings)); + dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %zd(%d), memload %d", + condemned_generation, + str_gc_reasons[reason], + str_gc_pause_modes[pause_mode], + final_youngest_desired, + gen0_reduction_count, + mem_pressure)); +#endif //DT_LOG +} -struct heap_balance_info_numa +#ifdef FEATURE_BASICFREEZE +sorted_table* +sorted_table::make_sorted_table () { - heap_balance_info_proc* hb_info_procs; -}; + size_t size = 400; -uint64_t start_raw_ts = 0; -bool cpu_group_enabled_p = false; -uint32_t procs_per_numa_node = 0; -uint16_t total_numa_nodes_on_machine = 0; -uint32_t procs_per_cpu_group = 0; -uint16_t total_cpu_groups_on_machine = 0; -// Note this is still on one of the numa nodes, so we'll incur a remote access -// no matter what. -heap_balance_info_numa* hb_info_numa_nodes = NULL; + // allocate one more bk to store the older slot address. + sorted_table* res = (sorted_table*)new (nothrow) char [sizeof (sorted_table) + (size + 1) * sizeof (bk)]; + if (!res) + return 0; + res->size = size; + res->slots = (bk*)(res + 1); + res->old_slots = 0; + res->clear(); + return res; +} -// TODO: This doesn't work for multiple nodes per CPU group yet. -int get_proc_index_numa (int proc_no, int* numa_no) +void +sorted_table::delete_sorted_table() { - if (total_numa_nodes_on_machine == 1) - { - *numa_no = 0; - return proc_no; - } - else + if (slots != (bk*)(this+1)) { - if (cpu_group_enabled_p) - { - // see vm\gcenv.os.cpp GroupProcNo implementation. - *numa_no = proc_no >> 6; - return (proc_no % 64); - } - else - { - *numa_no = proc_no / procs_per_numa_node; - return (proc_no % procs_per_numa_node); - } + delete[] slots; } + delete_old_slots(); } - - - -const int hb_log_buffer_size = 4096; -static char hb_log_buffer[hb_log_buffer_size]; -int last_hb_recorded_gc_index = -1; -#endif //HEAP_BALANCE_INSTRUMENTATION - -void set_thread_affinity_for_heap (int heap_number, uint16_t proc_no) +void +sorted_table::delete_old_slots() { - if (!GCToOSInterface::SetThreadAffinity (proc_no)) + uint8_t* sl = (uint8_t*)old_slots; + while (sl) { - dprintf (1, ("Failed to set thread affinity for GC thread %d on proc #%d", heap_number, proc_no)); + uint8_t* dsl = sl; + sl = last_slot ((bk*)sl); + delete[] dsl; } + old_slots = 0; } - -#endif //MULTIPLE_HEAPS - -class mark +void +sorted_table::enqueue_old_slot(bk* sl) { -public: - uint8_t* first; - size_t len; - - // If we want to save space we can have a pool of plug_and_gap's instead of - // always having 2 allocated for each pinned plug. - gap_reloc_pair saved_pre_plug; - // If we decide to not compact, we need to restore the original values. - gap_reloc_pair saved_pre_plug_reloc; - - gap_reloc_pair saved_post_plug; - - // Supposedly Pinned objects cannot have references but we are seeing some from pinvoke - // frames. Also if it's an artificially pinned plug created by us, it can certainly - // have references. - // We know these cases will be rare so we can optimize this to be only allocated on demand. - gap_reloc_pair saved_post_plug_reloc; - - // We need to calculate this after we are done with plan phase and before compact - // phase because compact phase will change the bricks so relocate_address will no - // longer work. - uint8_t* saved_pre_plug_info_reloc_start; - - // We need to save this because we will have no way to calculate it, unlike the - // pre plug info start which is right before this plug. - uint8_t* saved_post_plug_info_start; - -#ifdef SHORT_PLUGS - uint8_t* allocation_context_start_region; -#endif //SHORT_PLUGS - - // How the bits in these bytes are organized: - // MSB --> LSB - // bit to indicate whether it's a short obj | 3 bits for refs in this short obj | 2 unused bits | bit to indicate if it's collectible | last bit - // last bit indicates if there's pre or post info associated with this plug. If it's not set all other bits will be 0. - BOOL saved_pre_p; - BOOL saved_post_p; - -#ifdef _DEBUG - // We are seeing this is getting corrupted for a PP with a NP after. - // Save it when we first set it and make sure it doesn't change. - gap_reloc_pair saved_post_plug_debug; -#endif //_DEBUG - - size_t get_max_short_bits() - { - return (sizeof (gap_reloc_pair) / sizeof (uint8_t*)); - } - - // pre bits - size_t get_pre_short_start_bit () - { - return (sizeof (saved_pre_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*))); - } - - BOOL pre_short_p() - { - return (saved_pre_p & (1 << (sizeof (saved_pre_p) * 8 - 1))); - } - - void set_pre_short() - { - saved_pre_p |= (1 << (sizeof (saved_pre_p) * 8 - 1)); - } - - void set_pre_short_bit (size_t bit) - { - saved_pre_p |= 1 << (get_pre_short_start_bit() + bit); - } - - BOOL pre_short_bit_p (size_t bit) - { - return (saved_pre_p & (1 << (get_pre_short_start_bit() + bit))); - } - -#ifdef COLLECTIBLE_CLASS - void set_pre_short_collectible() - { - saved_pre_p |= 2; - } - - BOOL pre_short_collectible_p() - { - return (saved_pre_p & 2); - } -#endif //COLLECTIBLE_CLASS - - // post bits - size_t get_post_short_start_bit () - { - return (sizeof (saved_post_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*))); - } - - BOOL post_short_p() - { - return (saved_post_p & (1 << (sizeof (saved_post_p) * 8 - 1))); - } - - void set_post_short() - { - saved_post_p |= (1 << (sizeof (saved_post_p) * 8 - 1)); - } - - void set_post_short_bit (size_t bit) - { - saved_post_p |= 1 << (get_post_short_start_bit() + bit); - } - - BOOL post_short_bit_p (size_t bit) - { - return (saved_post_p & (1 << (get_post_short_start_bit() + bit))); - } - -#ifdef COLLECTIBLE_CLASS - void set_post_short_collectible() - { - saved_post_p |= 2; - } - - BOOL post_short_collectible_p() - { - return (saved_post_p & 2); - } -#endif //COLLECTIBLE_CLASS - - uint8_t* get_plug_address() { return first; } - - BOOL has_pre_plug_info() { return saved_pre_p; } - BOOL has_post_plug_info() { return saved_post_p; } - - gap_reloc_pair* get_pre_plug_reloc_info() { return &saved_pre_plug_reloc; } - gap_reloc_pair* get_post_plug_reloc_info() { return &saved_post_plug_reloc; } - void set_pre_plug_info_reloc_start (uint8_t* reloc) { saved_pre_plug_info_reloc_start = reloc; } - uint8_t* get_post_plug_info_start() { return saved_post_plug_info_start; } - - // We need to temporarily recover the shortened plugs for compact phase so we can - // copy over the whole plug and their related info (mark bits/cards). But we will - // need to set the artificial gap back so compact phase can keep reading the plug info. - // We also need to recover the saved info because we'll need to recover it later. - // - // So we would call swap_p*_plug_and_saved once to recover the object info; then call - // it again to recover the artificial gap. - void swap_pre_plug_and_saved() - { - gap_reloc_pair temp; - memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp)); - memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc)); - saved_pre_plug_reloc = temp; - } + last_slot (sl) = (uint8_t*)old_slots; + old_slots = sl; +} - void swap_post_plug_and_saved() +inline +size_t +sorted_table::lookup (uint8_t*& add) +{ + ptrdiff_t high = (count-1); + ptrdiff_t low = 0; + ptrdiff_t ti; + ptrdiff_t mid; + bk* buck = buckets(); + while (low <= high) { - gap_reloc_pair temp; - memcpy (&temp, saved_post_plug_info_start, sizeof (temp)); - memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc)); - saved_post_plug_reloc = temp; + mid = ((low + high)/2); + ti = mid; + if (buck[ti].add > add) + { + if ((ti > 0) && (buck[ti-1].add <= add)) + { + add = buck[ti-1].add; + return buck[ti - 1].val; + } + high = mid - 1; + } + else + { + if (buck[ti+1].add > add) + { + add = buck[ti].add; + return buck[ti].val; + } + low = mid + 1; + } } + add = 0; + return 0; +} - void swap_pre_plug_and_saved_for_profiler() +BOOL +sorted_table::ensure_space_for_insert() +{ + if (count == size) { - gap_reloc_pair temp; - memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp)); - memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug)); - saved_pre_plug = temp; - } + size = (size * 3)/2; + assert((size * sizeof (bk)) > 0); + bk* res = (bk*)new (nothrow) char [(size + 1) * sizeof (bk)]; + assert (res); + if (!res) + return FALSE; - void swap_post_plug_and_saved_for_profiler() - { - gap_reloc_pair temp; - memcpy (&temp, saved_post_plug_info_start, sizeof (temp)); - memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug)); - saved_post_plug = temp; + last_slot (res) = 0; + memcpy (((bk*)res + 1), buckets(), count * sizeof (bk)); + bk* last_old_slots = slots; + slots = res; + if (last_old_slots != (bk*)(this + 1)) + enqueue_old_slot (last_old_slots); } + return TRUE; +} - // We should think about whether it's really necessary to have to copy back the pre plug - // info since it was already copied during compacting plugs. But if a plug doesn't move - // by >= 3 ptr size (the size of gap_reloc_pair), it means we'd have to recover pre plug info. - size_t recover_plug_info() - { - // We need to calculate the size for sweep case in order to correctly record the - // free_obj_space - sweep would've made these artificial gaps into free objects and - // we would need to deduct the size because now we are writing into those free objects. - size_t recovered_sweep_size = 0; +BOOL +sorted_table::insert (uint8_t* add, size_t val) +{ + //grow if no more room + assert (count < size); - if (saved_pre_p) + //insert sorted + ptrdiff_t high = (count-1); + ptrdiff_t low = 0; + ptrdiff_t ti; + ptrdiff_t mid; + bk* buck = buckets(); + while (low <= high) + { + mid = ((low + high)/2); + ti = mid; + if (buck[ti].add > add) { - if (gc_heap::settings.compaction) + if ((ti == 0) || (buck[ti-1].add <= add)) { - dprintf (3, ("%p: REC Pre: %p-%p", - first, - &saved_pre_plug_reloc, - saved_pre_plug_info_reloc_start)); - memcpy (saved_pre_plug_info_reloc_start, &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc)); + // found insertion point + for (ptrdiff_t k = count; k > ti;k--) + { + buck [k] = buck [k-1]; + } + buck[ti].add = add; + buck[ti].val = val; + count++; + return TRUE; } - else + high = mid - 1; + } + else + { + if (buck[ti+1].add > add) { - dprintf (3, ("%p: REC Pre: %p-%p", - first, - &saved_pre_plug, - (first - sizeof (plug_and_gap)))); - memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug)); - recovered_sweep_size += sizeof (saved_pre_plug); + //found the insertion point + for (ptrdiff_t k = count; k > ti+1;k--) + { + buck [k] = buck [k-1]; + } + buck[ti+1].add = add; + buck[ti+1].val = val; + count++; + return TRUE; } + low = mid + 1; } + } + assert (0); + return TRUE; +} - if (saved_post_p) +void +sorted_table::remove (uint8_t* add) +{ + ptrdiff_t high = (count-1); + ptrdiff_t low = 0; + ptrdiff_t ti; + ptrdiff_t mid; + bk* buck = buckets(); + while (low <= high) + { + mid = ((low + high)/2); + ti = mid; + if (buck[ti].add > add) { - if (gc_heap::settings.compaction) + if (buck[ti-1].add <= add) { - dprintf (3, ("%p: REC Post: %p-%p", - first, - &saved_post_plug_reloc, - saved_post_plug_info_start)); - memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc)); + for (ptrdiff_t k = ti; k < count; k++) + buck[k-1] = buck[k]; + count--; + return; } - else + high = mid - 1; + } + else + { + if (buck[ti+1].add > add) { - dprintf (3, ("%p: REC Post: %p-%p", - first, - &saved_post_plug, - saved_post_plug_info_start)); - memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug)); - recovered_sweep_size += sizeof (saved_post_plug); + for (ptrdiff_t k = ti+1; k < count; k++) + buck[k-1] = buck[k]; + count--; + return; } + low = mid + 1; } - - return recovered_sweep_size; } -}; - + assert (0); +} -void gc_mechanisms::init_mechanisms() +void +sorted_table::clear() { - condemned_generation = 0; - promotion = FALSE;//TRUE; - compaction = TRUE; -#ifdef FEATURE_LOH_COMPACTION - loh_compaction = gc_heap::loh_compaction_requested(); -#else - loh_compaction = FALSE; -#endif //FEATURE_LOH_COMPACTION - heap_expansion = FALSE; - concurrent = FALSE; - demotion = FALSE; - elevation_reduced = FALSE; - found_finalizers = FALSE; -#ifdef BACKGROUND_GC - background_p = gc_heap::background_running_p() != FALSE; -#endif //BACKGROUND_GC + count = 1; + buckets()[0].add = MAX_PTR; +} +#endif //FEATURE_BASICFREEZE - entry_memory_load = 0; - entry_available_physical_mem = 0; - exit_memory_load = 0; +#ifdef FEATURE_BASICFREEZE -#ifdef STRESS_HEAP - stress_induced = FALSE; -#endif // STRESS_HEAP +heap_segment* ro_segment_lookup (uint8_t* o) +{ + uint8_t* ro_seg_start = o; + heap_segment* seg = (heap_segment*)gc_heap::seg_table->lookup (ro_seg_start); + + if (ro_seg_start && in_range_for_segment (o, seg)) + return seg; + else + return 0; } -void gc_mechanisms::first_init() +#endif //FEATURE_BASICFREEZE + +#ifdef MULTIPLE_HEAPS +inline +gc_heap* seg_mapping_table_heap_of_worker (uint8_t* o) { - gc_index = 0; - gen0_reduction_count = 0; - should_lock_elevation = FALSE; - elevation_locked_count = 0; - reason = reason_empty; -#ifdef BACKGROUND_GC - pause_mode = gc_heap::gc_can_use_concurrent ? pause_interactive : pause_batch; + size_t index = (size_t)o >> gc_heap::min_segment_size_shr; + seg_mapping* entry = &seg_mapping_table[index]; + +#ifdef USE_REGIONS + gc_heap* hp = heap_segment_heap ((heap_segment*)entry); +#else + gc_heap* hp = ((o > entry->boundary) ? entry->h1 : entry->h0); + + dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, h0: %p, seg0: %p, h1: %p, seg1: %p", + o, index, (entry->boundary + 1), + (uint8_t*)(entry->h0), (uint8_t*)(entry->seg0), + (uint8_t*)(entry->h1), (uint8_t*)(entry->seg1))); + #ifdef _DEBUG - int debug_pause_mode = static_cast(GCConfig::GetLatencyMode()); - if (debug_pause_mode >= 0) + heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0); +#ifdef FEATURE_BASICFREEZE + if ((size_t)seg & ro_in_entry) + seg = (heap_segment*)((size_t)seg & ~ro_in_entry); +#endif //FEATURE_BASICFREEZE + +#ifdef TRACE_GC + if (seg) { - assert (debug_pause_mode <= pause_sustained_low_latency); - pause_mode = (gc_pause_mode)debug_pause_mode; + if (in_range_for_segment (o, seg)) + { + dprintf (2, ("obj %p belongs to segment %p(-%p)", o, seg, (uint8_t*)heap_segment_allocated (seg))); + } + else + { + dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg", + seg, (uint8_t*)heap_segment_allocated (seg), o)); + } + } + else + { + dprintf (2, ("could not find obj %p in any existing segments", o)); } +#endif //TRACE_GC #endif //_DEBUG -#else //BACKGROUND_GC - pause_mode = pause_batch; -#endif //BACKGROUND_GC - - init_mechanisms(); +#endif //USE_REGIONS + return hp; } -void gc_mechanisms::record (gc_history_global* history) -{ -#ifdef MULTIPLE_HEAPS - history->num_heaps = gc_heap::n_heaps; -#else - history->num_heaps = 1; + #endif //MULTIPLE_HEAPS - history->condemned_generation = condemned_generation; - history->gen0_reduction_count = gen0_reduction_count; - history->reason = reason; - history->pause_mode = (int)pause_mode; - history->mem_pressure = entry_memory_load; - history->global_mechanisms_p = 0; +// Only returns a valid seg if we can actually find o on the seg. +heap_segment* seg_mapping_table_segment_of (uint8_t* o) +{ +#ifdef FEATURE_BASICFREEZE + if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address)) + return ro_segment_lookup (o); +#endif //FEATURE_BASICFREEZE - // start setting the boolean values. - if (concurrent) - history->set_mechanism_p (global_concurrent); + size_t index = (size_t)o >> gc_heap::min_segment_size_shr; + seg_mapping* entry = &seg_mapping_table[index]; - if (compaction) - history->set_mechanism_p (global_compaction); +#ifdef USE_REGIONS + // REGIONS TODO: I think we could simplify this to having the same info for each + // basic entry in a large region so we can get it right away instead of having to go + // back some entries. + ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated ((heap_segment*)entry); + if (first_field == 0) + { + dprintf (REGIONS_LOG, ("asked for seg for %p, in a freed region mem: %p, committed %p", + o, heap_segment_mem ((heap_segment*)entry), + heap_segment_committed ((heap_segment*)entry))); + return 0; + } + // Regions are never going to intersect an ro seg, so this can never be ro_in_entry. + assert (first_field != 0); + assert (first_field != ro_in_entry); + if (first_field < 0) + { + index += first_field; + } + heap_segment* seg = (heap_segment*)&seg_mapping_table[index]; +#else //USE_REGIONS + dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, seg0: %p, seg1: %p", + o, index, (entry->boundary + 1), + (uint8_t*)(entry->seg0), (uint8_t*)(entry->seg1))); - if (promotion) - history->set_mechanism_p (global_promotion); + heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0); +#ifdef FEATURE_BASICFREEZE + if ((size_t)seg & ro_in_entry) + seg = (heap_segment*)((size_t)seg & ~ro_in_entry); +#endif //FEATURE_BASICFREEZE +#endif //USE_REGIONS - if (demotion) - history->set_mechanism_p (global_demotion); + if (seg) + { + if (in_range_for_segment (o, seg)) + { + dprintf (2, ("obj %p belongs to segment %p(-%p)", o, (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg))); + } + else + { + dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg, setting it to 0", + (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg), o)); + seg = 0; + } + } + else + { + dprintf (2, ("could not find obj %p in any existing segments", o)); + } - if (card_bundles) - history->set_mechanism_p (global_card_bundles); +#ifdef FEATURE_BASICFREEZE + // TODO: This was originally written assuming that the seg_mapping_table would always contain entries for ro + // segments whenever the ro segment falls into the [g_gc_lowest_address,g_gc_highest_address) range. I.e., it had an + // extra "&& (size_t)(entry->seg1) & ro_in_entry" expression. However, at the moment, grow_brick_card_table does + // not correctly go through the ro segments and add them back to the seg_mapping_table when the [lowest,highest) + // range changes. We should probably go ahead and modify grow_brick_card_table and put back the + // "&& (size_t)(entry->seg1) & ro_in_entry" here. + if (!seg) + { + seg = ro_segment_lookup (o); + if (seg && !in_range_for_segment (o, seg)) + seg = 0; + } +#endif //FEATURE_BASICFREEZE - if (elevation_reduced) - history->set_mechanism_p (global_elevation); + return seg; } -/********************************** - called at the beginning of GC to fix the allocated size to - what is really allocated, or to turn the free area into an unused object - It needs to be called after all of the other allocation contexts have been - fixed since it relies on alloc_allocated. - ********************************/ - +size_t gcard_of ( uint8_t*); +#define slot(i, j) ((uint8_t**)(i))[(j)+1] -inline -BOOL grow_mark_stack (mark*& m, size_t& len, size_t init_len) +heap_segment* heap_segment_rw (heap_segment* ns) { - size_t new_size = max (init_len, 2*len); - mark* tmp = new (nothrow) mark [new_size]; - if (tmp) + if ((ns == 0) || !heap_segment_read_only_p (ns)) { - memcpy (tmp, m, len * sizeof (mark)); - delete[] m; - m = tmp; - len = new_size; - return TRUE; + return ns; } else { - dprintf (1, ("Failed to allocate %zd bytes for mark stack", (len * sizeof (mark)))); - return FALSE; + do + { + ns = heap_segment_next (ns); + } while ((ns != 0) && heap_segment_read_only_p (ns)); + return ns; } } -inline -uint8_t* pinned_plug (mark* m) -{ - return m->first; -} - -inline -size_t& pinned_len (mark* m) +//returns the next non ro segment. +heap_segment* heap_segment_next_rw (heap_segment* seg) { - return m->len; + heap_segment* ns = heap_segment_next (seg); + return heap_segment_rw (ns); } -inline -void set_new_pin_info (mark* m, uint8_t* pin_free_space_start) +// returns the segment before seg. +heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg) { - m->len = pinned_plug (m) - pin_free_space_start; -#ifdef SHORT_PLUGS - m->allocation_context_start_region = pin_free_space_start; -#endif //SHORT_PLUGS -} + assert (begin != 0); + heap_segment* prev = begin; + heap_segment* current = heap_segment_next_rw (begin); -#ifdef SHORT_PLUGS -inline -uint8_t*& pin_allocation_context_start_region (mark* m) -{ - return m->allocation_context_start_region; -} + while (current && current != seg) + { + prev = current; + current = heap_segment_next_rw (current); + } -uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry) -{ - uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info()); - uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap))); - //dprintf (2, ("detected a very short plug: %zx before PP %zx, pad %zx", - // old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved)); - dprintf (2, ("EP: %p(%p), %p", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved)); - return plug_start_in_saved; + if (current == seg) + { + return prev; + } + else + { + return 0; + } } +initial_memory_details memory_details; -#endif //SHORT_PLUGS - -#ifdef CARD_BUNDLE -// The card bundle keeps track of groups of card words. -static const size_t card_bundle_word_width = 32; - -// How do we express the fact that 32 bits (card_word_width) is one uint32_t? -static const size_t card_bundle_size = (size_t)(GC_PAGE_SIZE / (sizeof(uint32_t)*card_bundle_word_width)); - -inline -size_t card_bundle_word (size_t cardb) +heap_segment* make_initial_segment (int gen, int h_number, gc_heap* hp) { - return cardb / card_bundle_word_width; -} + void* mem = memory_details.get_initial_memory (gen, h_number); + size_t size = memory_details.get_initial_size (gen); + heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size, hp, gen); -inline -uint32_t card_bundle_bit (size_t cardb) -{ - return (uint32_t)(cardb % card_bundle_word_width); + return res; } -size_t align_cardw_on_bundle (size_t cardw) +void* virtual_alloc (size_t size) { - return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 )); + return virtual_alloc(size, false); } -// Get the card bundle representing a card word -size_t cardw_card_bundle (size_t cardw) +void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node) { - return cardw / card_bundle_size; -} + size_t requested_size = size; -// Get the first card word in a card bundle -size_t card_bundle_cardw (size_t cardb) -{ - return cardb * card_bundle_size; -} + if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size) + { + gc_heap::reserved_memory_limit = gc_heap::reserved_memory_limit + requested_size; + if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size) + { + return 0; + } + } + uint32_t flags = VirtualReserveFlags::None; +#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (virtual_alloc_hardware_write_watch) + { + flags = VirtualReserveFlags::WriteWatch; + } +#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -// Takes a pointer to a card bundle table and an address, and returns a pointer that represents -// where a theoretical card bundle table that represents every address (starting from 0) would -// start if the bundle word representing the address were to be located at the pointer passed in. -// The returned 'translated' pointer makes it convenient/fast to calculate where the card bundle -// for a given address is using a simple shift operation on the address. -uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address) -{ - // The number of bytes of heap memory represented by a card bundle word - const size_t heap_bytes_for_bundle_word = card_size * card_word_width * card_bundle_size * card_bundle_word_width; + void* prgmem = use_large_pages_p ? + GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size, numa_node) : + GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags, numa_node); + void *aligned_mem = prgmem; - // Each card bundle word is 32 bits - return (uint32_t*)((uint8_t*)cb - (((size_t)lowest_address / heap_bytes_for_bundle_word) * sizeof (uint32_t))); -} + // We don't want (prgmem + size) to be right at the end of the address space + // because we'd have to worry about that everytime we do (address + size). + // We also want to make sure that we leave loh_size_threshold at the end + // so we allocate a small object we don't need to worry about overflow there + // when we do alloc_ptr+size. + if (prgmem) + { + uint8_t* end_mem = (uint8_t*)prgmem + requested_size; -#endif // CARD_BUNDLE + if ((end_mem == 0) || ((size_t)(MAX_PTR - end_mem) <= END_SPACE_AFTER_GC)) + { + GCToOSInterface::VirtualRelease (prgmem, requested_size); + dprintf (2, ("Virtual Alloc size %zd returned memory right against 4GB [%zx, %zx[ - discarding", + requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size))); + prgmem = 0; + aligned_mem = 0; + } + } -#if defined (HOST_64BIT) -#define brick_size ((size_t)4096) -#else -#define brick_size ((size_t)2048) -#endif //HOST_64BIT + if (prgmem) + { + gc_heap::reserved_memory += requested_size; + } -inline -uint8_t* align_on_brick (uint8_t* add) -{ - return (uint8_t*)((size_t)(add + brick_size - 1) & ~(brick_size - 1)); -} + dprintf (2, ("Virtual Alloc size %zd: [%zx, %zx[", + requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size))); -inline -uint8_t* align_lower_brick (uint8_t* add) -{ - return (uint8_t*)(((size_t)add) & ~(brick_size - 1)); + return aligned_mem; } -size_t size_brick_of (uint8_t* from, uint8_t* end) +size_t get_valid_segment_size (BOOL large_seg) { - assert (((size_t)from & (brick_size-1)) == 0); - assert (((size_t)end & (brick_size-1)) == 0); + size_t seg_size, initial_seg_size; - return ((end - from) / brick_size) * sizeof (short); -} + if (!large_seg) + { + initial_seg_size = INITIAL_ALLOC; + seg_size = static_cast(GCConfig::GetSegmentSize()); + } + else + { + initial_seg_size = LHEAP_ALLOC; + seg_size = static_cast(GCConfig::GetSegmentSize()) / 2; + } -inline -uint8_t* align_on_card (uint8_t* add) -{ - return (uint8_t*)((size_t)(add + card_size - 1) & ~(card_size - 1 )); -} -inline -uint8_t* align_on_card_word (uint8_t* add) -{ - return (uint8_t*) ((size_t)(add + (card_size*card_word_width)-1) & ~(card_size*card_word_width - 1)); -} +#ifdef MULTIPLE_HEAPS +#ifdef HOST_64BIT + if (!large_seg) +#endif // HOST_64BIT + { + if (g_num_processors > 4) + initial_seg_size /= 2; + if (g_num_processors > 8) + initial_seg_size /= 2; + } +#endif //MULTIPLE_HEAPS -inline -uint8_t* align_lower_card (uint8_t* add) -{ - return (uint8_t*)((size_t)add & ~(card_size-1)); -} + // if seg_size is small but not 0 (0 is default if config not set) + // then set the segment to the minimum size + if (!g_theGCHeap->IsValidSegmentSize(seg_size)) + { + // if requested size is between 1 byte and 4MB, use min + if ((seg_size >> 1) && !(seg_size >> 22)) + seg_size = 1024*1024*4; + else + seg_size = initial_seg_size; + } -// Returns the number of DWORDs in the card table that cover the -// range of addresses [from, end[. -size_t count_card_of (uint8_t* from, uint8_t* end) -{ - return card_word (gcard_of (end - 1)) - card_word (gcard_of (from)) + 1; +#ifdef HOST_64BIT + seg_size = round_up_power2 (seg_size); +#else + seg_size = round_down_power2 (seg_size); +#endif // HOST_64BIT + + return (seg_size); } -// Returns the number of bytes to allocate for a card table -// that covers the range of addresses [from, end[. -size_t size_card_of (uint8_t* from, uint8_t* end) +#ifndef USE_REGIONS +void +gc_heap::compute_new_ephemeral_size() { - return count_card_of (from, end) * sizeof(uint32_t); -} + int eph_gen_max = max_generation - 1 - (settings.promotion ? 1 : 0); + size_t padding_size = 0; -// We don't store seg_mapping_table in card_table_info because there's only always one view. -class card_table_info -{ -public: - unsigned recount; - size_t size; - uint32_t* next_card_table; + for (int i = 0; i <= eph_gen_max; i++) + { + dynamic_data* dd = dynamic_data_of (i); + total_ephemeral_size += (dd_survived_size (dd) - dd_pinned_survived_size (dd)); +#ifdef RESPECT_LARGE_ALIGNMENT + total_ephemeral_size += dd_num_npinned_plugs (dd) * switch_alignment_size (FALSE); +#endif //RESPECT_LARGE_ALIGNMENT +#ifdef FEATURE_STRUCTALIGN + total_ephemeral_size += dd_num_npinned_plugs (dd) * MAX_STRUCTALIGN; +#endif //FEATURE_STRUCTALIGN - uint8_t* lowest_address; - uint8_t* highest_address; - short* brick_table; +#ifdef SHORT_PLUGS + padding_size += dd_padding_size (dd); +#endif //SHORT_PLUGS + } -#ifdef CARD_BUNDLE - uint32_t* card_bundle_table; -#endif //CARD_BUNDLE + total_ephemeral_size += eph_gen_starts_size; - // mark_array is always at the end of the data structure because we - // want to be able to make one commit call for everything before it. -#ifdef BACKGROUND_GC - uint32_t* mark_array; -#endif //BACKGROUND_GC -}; +#ifdef RESPECT_LARGE_ALIGNMENT + size_t planned_ephemeral_size = heap_segment_plan_allocated (ephemeral_heap_segment) - + generation_plan_allocation_start (generation_of (max_generation-1)); + total_ephemeral_size = min (total_ephemeral_size, planned_ephemeral_size); +#endif //RESPECT_LARGE_ALIGNMENT -static_assert(offsetof(dac_card_table_info, size) == offsetof(card_table_info, size), "DAC card_table_info layout mismatch"); -static_assert(offsetof(dac_card_table_info, next_card_table) == offsetof(card_table_info, next_card_table), "DAC card_table_info layout mismatch"); +#ifdef SHORT_PLUGS + total_ephemeral_size = Align ((size_t)((double)total_ephemeral_size * short_plugs_pad_ratio) + 1); + total_ephemeral_size += Align (DESIRED_PLUG_LENGTH); +#endif //SHORT_PLUGS -//These are accessors on untranslated cardtable -inline -unsigned& card_table_refcount (uint32_t* c_table) -{ - return *(unsigned*)((char*)c_table - sizeof (card_table_info)); + dprintf (3, ("total ephemeral size is %zx, padding %zx(%zx)", + total_ephemeral_size, + padding_size, (total_ephemeral_size - padding_size))); } -inline -uint8_t*& card_table_lowest_address (uint32_t* c_table) +heap_segment* +gc_heap::soh_get_segment_to_expand() { - return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->lowest_address; -} + size_t size = soh_segment_size; -uint32_t* translate_card_table (uint32_t* ct) -{ - return (uint32_t*)((uint8_t*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(uint32_t)); -} + ordered_plug_indices_init = FALSE; + use_bestfit = FALSE; -inline -uint8_t*& card_table_highest_address (uint32_t* c_table) -{ - return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->highest_address; -} + //compute the size of the new ephemeral heap segment. + compute_new_ephemeral_size(); -inline -short*& card_table_brick_table (uint32_t* c_table) -{ - return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->brick_table; -} + if ((settings.pause_mode != pause_low_latency) && + (settings.pause_mode != pause_no_gc) +#ifdef BACKGROUND_GC + && (!gc_heap::background_running_p()) +#endif //BACKGROUND_GC + ) + { + assert (settings.condemned_generation <= max_generation); + allocator* gen_alloc = ((settings.condemned_generation == max_generation) ? nullptr : + generation_allocator (generation_of (max_generation))); + dprintf (2, ("(gen%d)soh_get_segment_to_expand", settings.condemned_generation)); -#ifdef CARD_BUNDLE -inline -uint32_t*& card_table_card_bundle_table (uint32_t* c_table) -{ - return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->card_bundle_table; -} -#endif //CARD_BUNDLE + // try to find one in the gen 2 segment list, search backwards because the first segments + // tend to be more compact than the later ones. + heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation))); -#ifdef BACKGROUND_GC -inline -uint32_t*& card_table_mark_array (uint32_t* c_table) -{ - return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->mark_array; -} + _ASSERTE(fseg != NULL); -#ifdef HOST_64BIT -#define mark_bit_pitch ((size_t)16) -#else -#define mark_bit_pitch ((size_t)8) -#endif // HOST_64BIT -#define mark_word_width ((size_t)32) -#define mark_word_size (mark_word_width * mark_bit_pitch) +#ifdef SEG_REUSE_STATS + int try_reuse = 0; +#endif //SEG_REUSE_STATS -inline -uint8_t* align_on_mark_bit (uint8_t* add) -{ - return (uint8_t*)((size_t)(add + (mark_bit_pitch - 1)) & ~(mark_bit_pitch - 1)); -} + heap_segment* seg = ephemeral_heap_segment; + while ((seg = heap_segment_prev_rw (fseg, seg)) && (seg != fseg)) + { +#ifdef SEG_REUSE_STATS + try_reuse++; +#endif //SEG_REUSE_STATS -inline -uint8_t* align_lower_mark_bit (uint8_t* add) -{ - return (uint8_t*)((size_t)(add) & ~(mark_bit_pitch - 1)); -} + if (can_expand_into_p (seg, size/3, total_ephemeral_size, gen_alloc)) + { + get_gc_data_per_heap()->set_mechanism (gc_heap_expand, + (use_bestfit ? expand_reuse_bestfit : expand_reuse_normal)); + if (settings.condemned_generation == max_generation) + { + if (use_bestfit) + { + build_ordered_free_spaces (seg); + dprintf (GTC_LOG, ("can use best fit")); + } -inline -BOOL is_aligned_on_mark_word (uint8_t* add) -{ - return ((size_t)add == ((size_t)(add) & ~(mark_word_size - 1))); -} +#ifdef SEG_REUSE_STATS + dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse", + settings.condemned_generation, try_reuse)); +#endif //SEG_REUSE_STATS + dprintf (GTC_LOG, ("max_gen: Found existing segment to expand into %zx", (size_t)seg)); + return seg; + } + else + { +#ifdef SEG_REUSE_STATS + dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse - returning", + settings.condemned_generation, try_reuse)); +#endif //SEG_REUSE_STATS + dprintf (GTC_LOG, ("max_gen-1: Found existing segment to expand into %zx", (size_t)seg)); -inline -uint8_t* align_on_mark_word (uint8_t* add) -{ - return (uint8_t*)((size_t)(add + mark_word_size - 1) & ~(mark_word_size - 1)); -} + // If we return 0 here, the allocator will think since we are short on end + // of seg we need to trigger a full compacting GC. So if sustained low latency + // is set we should acquire a new seg instead, that way we wouldn't be short. + // The real solution, of course, is to actually implement seg reuse in gen1. + if (settings.pause_mode != pause_sustained_low_latency) + { + dprintf (GTC_LOG, ("max_gen-1: SustainedLowLatency is set, acquire a new seg")); + get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_next_full_gc); + return 0; + } + } + } + } + } -inline -uint8_t* align_lower_mark_word (uint8_t* add) -{ - return (uint8_t*)((size_t)(add) & ~(mark_word_size - 1)); -} + heap_segment* result = get_segment (size, gc_oh_num::soh); -inline -size_t mark_bit_of (uint8_t* add) -{ - return ((size_t)add / mark_bit_pitch); -} + if(result) + { +#ifdef BACKGROUND_GC + if (current_c_gc_state == c_gc_state_planning) + { + // When we expand heap during bgc sweep, we set the seg to be swept so + // we'll always look at cards for objects on the new segment. + result->flags |= heap_segment_flags_swept; + } +#endif //BACKGROUND_GC -inline -unsigned int mark_bit_bit (size_t mark_bit) -{ - return (unsigned int)(mark_bit % mark_word_width); -} + FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(result), + (size_t)(heap_segment_reserved (result) - heap_segment_mem(result)), + gc_etw_segment_small_object_heap); + } -inline -size_t mark_bit_word (size_t mark_bit) -{ - return (mark_bit / mark_word_width); -} + get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (result ? expand_new_seg : expand_no_memory)); -inline -size_t mark_word_of (uint8_t* add) -{ - return ((size_t)add) / mark_word_size; -} + if (result == 0) + { + dprintf (2, ("h%d: failed to allocate a new segment!", heap_number)); + } + else + { +#ifdef MULTIPLE_HEAPS + heap_segment_heap (result) = this; +#endif //MULTIPLE_HEAPS + } -uint8_t* mark_word_address (size_t wd) -{ - return (uint8_t*)(wd*mark_word_size); + dprintf (GTC_LOG, ("(gen%d)creating new segment %p", settings.condemned_generation, result)); + return result; } -uint8_t* mark_bit_address (size_t mark_bit) +//returns 0 in case of allocation failure +heap_segment* +gc_heap::get_segment (size_t size, gc_oh_num oh) { - return (uint8_t*)(mark_bit*mark_bit_pitch); -} + assert(oh != gc_oh_num::unknown); + BOOL uoh_p = (oh == gc_oh_num::loh) || (oh == gc_oh_num::poh); + if (heap_hard_limit) + return NULL; -inline -size_t mark_bit_bit_of (uint8_t* add) -{ - return (((size_t)add / mark_bit_pitch) % mark_word_width); -} + heap_segment* result = 0; -size_t size_mark_array_of (uint8_t* from, uint8_t* end) -{ - assert (((size_t)from & ((mark_word_size)-1)) == 0); - assert (((size_t)end & ((mark_word_size)-1)) == 0); - return sizeof (uint32_t)*(((end - from) / mark_word_size)); -} + if (segment_standby_list != 0) + { + result = segment_standby_list; + heap_segment* last = 0; + while (result) + { + size_t hs = (size_t)(heap_segment_reserved (result) - (uint8_t*)result); + if ((hs >= size) && ((hs / 2) < size)) + { + dprintf (2, ("Hoarded segment %zx found", (size_t) result)); + if (last) + { + heap_segment_next (last) = heap_segment_next (result); + } + else + { + segment_standby_list = heap_segment_next (result); + } + break; + } + else + { + last = result; + result = heap_segment_next (result); + } + } + } -//In order to eliminate the lowest_address in the mark array -//computations (mark_word_of, etc) mark_array is offset -// according to the lowest_address. -uint32_t* translate_mark_array (uint32_t* ma) -{ - return (uint32_t*)((uint8_t*)ma - size_mark_array_of (0, g_gc_lowest_address)); -} + if (result) + { + init_heap_segment (result, __this); +#ifdef BACKGROUND_GC + if (is_bgc_in_progress()) + { + dprintf (GC_TABLE_LOG, ("hoarded seg %p, mark_array is %p", result, mark_array)); + if (!commit_mark_array_new_seg (__this, result)) + { + dprintf (GC_TABLE_LOG, ("failed to commit mark array for hoarded seg")); + // If we can't use it we need to thread it back. + if (segment_standby_list != 0) + { + heap_segment_next (result) = segment_standby_list; + segment_standby_list = result; + } + else + { + segment_standby_list = result; + } + result = 0; + } + } #endif //BACKGROUND_GC -//These work on untranslated card tables -inline -uint32_t*& card_table_next (uint32_t* c_table) -{ - // NOTE: The dac takes a dependency on card_table_info being right before c_table. - // It's 100% ok to change this implementation detail as long as a matching change - // is made to DacGCBookkeepingEnumerator::Init in daccess.cpp. - return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->next_card_table; -} - -inline -size_t& card_table_size (uint32_t* c_table) -{ - return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->size; -} - -void own_card_table (uint32_t* c_table) -{ - card_table_refcount (c_table) += 1; -} - -void destroy_card_table (uint32_t* c_table); + if (result) + seg_mapping_table_add_segment (result, __this); + } -void delete_next_card_table (uint32_t* c_table) -{ - uint32_t* n_table = card_table_next (c_table); - if (n_table) + if (!result) { - if (card_table_next (n_table)) - { - delete_next_card_table (n_table); - } - if (card_table_refcount (n_table) == 0) + void* mem = virtual_alloc (size); + if (!mem) { - destroy_card_table (n_table); - card_table_next (c_table) = 0; + fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p); + return 0; } - } -} -void release_card_table (uint32_t* c_table) -{ - assert (card_table_refcount (c_table) >0); - card_table_refcount (c_table) -= 1; - if (card_table_refcount (c_table) == 0) - { - delete_next_card_table (c_table); - if (card_table_next (c_table) == 0) + result = make_heap_segment ((uint8_t*)mem, size, __this, (oh + max_generation)); + + if (result) { - destroy_card_table (c_table); - // sever the link from the parent - if (&g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))] == c_table) + uint8_t* start; + uint8_t* end; + if (mem < g_gc_lowest_address) { - g_gc_card_table = 0; + start = (uint8_t*)mem; + } + else + { + start = (uint8_t*)g_gc_lowest_address; + } -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - g_gc_card_bundle_table = 0; -#endif -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - SoftwareWriteWatch::StaticClose(); -#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + if (((uint8_t*)mem + size) > g_gc_highest_address) + { + end = (uint8_t*)mem + size; } else { - uint32_t* p_table = &g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))]; - if (p_table) + end = (uint8_t*)g_gc_highest_address; + } + + if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, uoh_p) != 0) + { + // release_segment needs the flags to decrement the proper bucket + size_t flags = 0; + if (oh == poh) { - while (p_table && (card_table_next (p_table) != c_table)) - p_table = card_table_next (p_table); - card_table_next (p_table) = 0; + flags = heap_segment_flags_poh; + } + else if (oh == loh) + { + flags = heap_segment_flags_loh; } + result->flags |= flags; + release_segment (result); + return 0; } } - } -} + else + { + fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p); + virtual_free (mem, size); + } -void destroy_card_table (uint32_t* c_table) -{ -// delete (uint32_t*)&card_table_refcount(c_table); + if (result) + { + seg_mapping_table_add_segment (result, __this); + } + } - size_t size = card_table_size(c_table); - gc_heap::destroy_card_table_helper (c_table); - GCToOSInterface::VirtualRelease (&card_table_refcount(c_table), size); - dprintf (2, ("Table Virtual Free : %zx", (size_t)&card_table_refcount(c_table))); -} +#ifdef BACKGROUND_GC + if (result) + { + ::record_changed_seg ((uint8_t*)result, heap_segment_reserved (result), + settings.gc_index, current_bgc_state, + seg_added); + bgc_verify_mark_array_cleared (result); + } +#endif //BACKGROUND_GC -uint8_t** make_mark_list (size_t size) -{ - uint8_t** mark_list = new (nothrow) uint8_t* [size]; - return mark_list; + dprintf (GC_TABLE_LOG, ("h%d: new seg: %p-%p (%zd)", heap_number, result, ((uint8_t*)result + size), size)); + return result; } -#define swap(a,b){uint8_t* t; t = a; a = b; b = t;} - +#endif //!USE_REGIONS -#ifndef USE_INTROSORT -void qsort1( uint8_t* *low, uint8_t* *high, unsigned int depth) -{ - if (((low + 16) >= high) || (depth > 100)) +#ifdef MULTIPLE_HEAPS +#ifdef HOST_X86 +#ifdef _MSC_VER +#pragma warning(disable:4035) + ptrdiff_t get_cycle_count() { - //insertion sort - uint8_t **i, **j; - for (i = low+1; i <= high; i++) - { - uint8_t* val = *i; - for (j=i;j >low && val<*(j-1);j--) - { - *j=*(j-1); - } - *j=val; - } + __asm rdtsc } - else +#pragma warning(default:4035) +#elif defined(__GNUC__) + ptrdiff_t get_cycle_count() { - uint8_t *pivot, **left, **right; + ptrdiff_t cycles; + ptrdiff_t cyclesHi; + __asm__ __volatile__ + ("rdtsc":"=a" (cycles), "=d" (cyclesHi)); + return cycles; + } +#else //_MSC_VER +#error Unknown compiler +#endif //_MSC_VER +#elif defined(TARGET_AMD64) +#ifdef _MSC_VER +extern "C" uint64_t __rdtsc(); +#pragma intrinsic(__rdtsc) + ptrdiff_t get_cycle_count() + { + return (ptrdiff_t)__rdtsc(); + } +#elif defined(__GNUC__) + ptrdiff_t get_cycle_count() + { + ptrdiff_t cycles; + ptrdiff_t cyclesHi; + __asm__ __volatile__ + ("rdtsc":"=a" (cycles), "=d" (cyclesHi)); + return (cyclesHi << 32) | cycles; + } +#else // _MSC_VER + extern "C" ptrdiff_t get_cycle_count(void); +#endif // _MSC_VER +#elif defined(TARGET_LOONGARCH64) + ptrdiff_t get_cycle_count() + { + ////FIXME: TODO for LOONGARCH64: + //ptrdiff_t cycle; + __asm__ volatile ("break 0 \n"); + return 0; + } +#else + ptrdiff_t get_cycle_count() + { + // @ARMTODO, @ARM64TODO, @WASMTODO: cycle counter is not exposed to user mode. For now (until we can show this + // makes a difference on the configurations on which we'll run) just return 0. This will result in + // all buffer access times being reported as equal in access_time(). + return 0; + } +#endif //TARGET_X86 - //sort low middle and high - if (*(low+((high-low)/2)) < *low) - swap (*(low+((high-low)/2)), *low); - if (*high < *low) - swap (*low, *high); - if (*high < *(low+((high-low)/2))) - swap (*(low+((high-low)/2)), *high); +// We may not be on contiguous numa nodes so need to store +// the node index as well. +uint8_t* heap_select::sniff_buffer; +unsigned heap_select::n_sniff_buffers; +unsigned heap_select::cur_sniff_index; +uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS]; +uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS]; +uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS]; +uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4]; +#ifdef HEAP_BALANCE_INSTRUMENTATION +uint16_t heap_select::total_numa_nodes; +node_heap_count heap_select::heaps_on_node[MAX_SUPPORTED_NODES]; +#endif - swap (*(low+((high-low)/2)), *(high-1)); - pivot = *(high-1); - left = low; right = high-1; - while (1) { - while (*(--right) > pivot); - while (*(++left) < pivot); - if (left < right) - { - swap(*left, *right); - } - else - break; - } - swap (*left, *(high-1)); - qsort1(low, left-1, depth+1); - qsort1(left+1, high, depth+1); - } -} -#endif //USE_INTROSORT +#ifdef HEAP_BALANCE_INSTRUMENTATION +// This records info we use to look at effect of different strategies +// for heap balancing. +struct heap_balance_info +{ + uint64_t timestamp; + // This also encodes when we detect the thread runs on + // different proc during a balance attempt. Sometimes + // I observe this happens multiple times during one attempt! + // If this happens, I just record the last proc we observe + // and set MSB. + int tid; + // This records the final alloc_heap for the thread. + // + // This also encodes the reason why we needed to set_home_heap + // in balance_heaps. + // If we set it because the home heap is not the same as the proc, + // we set MSB. + // + // If we set ideal proc, we set the 2nd MSB. + int alloc_heap; + int ideal_proc_no; +}; -#ifdef USE_VXSORT -static void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high) -{ - // above this threshold, using AVX2 for sorting will likely pay off - // despite possible downclocking on some devices - const ptrdiff_t AVX2_THRESHOLD_SIZE = 8 * 1024; +// This means inbetween each GC we can log at most this many entries per proc. +// This is usually enough. Most of the time we only need to log something every 128k +// of allocations in balance_heaps and gen0 budget is <= 200mb. +#define default_max_hb_heap_balance_info 4096 - // above this threshold, using AVX512F for sorting will likely pay off - // despite possible downclocking on current devices - const ptrdiff_t AVX512F_THRESHOLD_SIZE = 128 * 1024; +struct heap_balance_info_proc +{ + int count; + int index; + heap_balance_info hb_info[default_max_hb_heap_balance_info]; +}; - // above this threshold, using NEON for sorting will likely pay off - const ptrdiff_t NEON_THRESHOLD_SIZE = 1024; +struct heap_balance_info_numa +{ + heap_balance_info_proc* hb_info_procs; +}; - if (item_count <= 1) - return; +uint64_t start_raw_ts = 0; +bool cpu_group_enabled_p = false; +uint32_t procs_per_numa_node = 0; +uint16_t total_numa_nodes_on_machine = 0; +uint32_t procs_per_cpu_group = 0; +uint16_t total_cpu_groups_on_machine = 0; +// Note this is still on one of the numa nodes, so we'll incur a remote access +// no matter what. +heap_balance_info_numa* hb_info_numa_nodes = NULL; -#if defined(TARGET_AMD64) - if (IsSupportedInstructionSet (InstructionSet::AVX2) && (item_count > AVX2_THRESHOLD_SIZE)) +// TODO: This doesn't work for multiple nodes per CPU group yet. +int get_proc_index_numa (int proc_no, int* numa_no) +{ + if (total_numa_nodes_on_machine == 1) { - dprintf(3, ("Sorting mark lists")); - - // use AVX512F only if the list is large enough to pay for downclocking impact - if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE)) + *numa_no = 0; + return proc_no; + } + else + { + if (cpu_group_enabled_p) { - do_vxsort_avx512 (item_array, &item_array[item_count - 1], range_low, range_high); + // see vm\gcenv.os.cpp GroupProcNo implementation. + *numa_no = proc_no >> 6; + return (proc_no % 64); } else { - do_vxsort_avx2 (item_array, &item_array[item_count - 1], range_low, range_high); + *numa_no = proc_no / procs_per_numa_node; + return (proc_no % procs_per_numa_node); } } -#elif defined(TARGET_ARM64) - if (IsSupportedInstructionSet (InstructionSet::NEON) && (item_count > NEON_THRESHOLD_SIZE)) - { - dprintf(3, ("Sorting mark lists")); - do_vxsort_neon (item_array, &item_array[item_count - 1], range_low, range_high); - } -#endif - else - { - dprintf (3, ("Sorting mark lists")); - introsort::sort (item_array, &item_array[item_count - 1], 0); - } -#ifdef _DEBUG - // check the array is sorted - for (ptrdiff_t i = 0; i < item_count - 1; i++) +} + + + +const int hb_log_buffer_size = 4096; +static char hb_log_buffer[hb_log_buffer_size]; +int last_hb_recorded_gc_index = -1; +#endif //HEAP_BALANCE_INSTRUMENTATION + +void set_thread_affinity_for_heap (int heap_number, uint16_t proc_no) +{ + if (!GCToOSInterface::SetThreadAffinity (proc_no)) { - assert (item_array[i] <= item_array[i + 1]); + dprintf (1, ("Failed to set thread affinity for GC thread %d on proc #%d", heap_number, proc_no)); } - // check that the ends of the array are indeed in range - // together with the above this implies all elements are in range - assert ((range_low <= item_array[0]) && (item_array[item_count - 1] <= range_high)); -#endif } -#endif //USE_VXSORT -#ifdef MULTIPLE_HEAPS - -#ifdef _DEBUG +#endif //MULTIPLE_HEAPS -#if !defined(_MSC_VER) -#if !defined(__cdecl) -#if defined(__i386__) -#define __cdecl __attribute__((cdecl)) +void gc_mechanisms::init_mechanisms() +{ + condemned_generation = 0; + promotion = FALSE;//TRUE; + compaction = TRUE; +#ifdef FEATURE_LOH_COMPACTION + loh_compaction = gc_heap::loh_compaction_requested(); #else -#define __cdecl -#endif -#endif -#endif + loh_compaction = FALSE; +#endif //FEATURE_LOH_COMPACTION + heap_expansion = FALSE; + concurrent = FALSE; + demotion = FALSE; + elevation_reduced = FALSE; + found_finalizers = FALSE; +#ifdef BACKGROUND_GC + background_p = gc_heap::background_running_p() != FALSE; +#endif //BACKGROUND_GC -#endif // _DEBUG + entry_memory_load = 0; + entry_available_physical_mem = 0; + exit_memory_load = 0; -#else +#ifdef STRESS_HEAP + stress_induced = FALSE; +#endif // STRESS_HEAP +} -#ifdef USE_REGIONS +void gc_mechanisms::first_init() +{ + gc_index = 0; + gen0_reduction_count = 0; + should_lock_elevation = FALSE; + elevation_locked_count = 0; + reason = reason_empty; +#ifdef BACKGROUND_GC + pause_mode = gc_heap::gc_can_use_concurrent ? pause_interactive : pause_batch; +#ifdef _DEBUG + int debug_pause_mode = static_cast(GCConfig::GetLatencyMode()); + if (debug_pause_mode >= 0) + { + assert (debug_pause_mode <= pause_sustained_low_latency); + pause_mode = (gc_pause_mode)debug_pause_mode; + } +#endif //_DEBUG +#else //BACKGROUND_GC + pause_mode = pause_batch; +#endif //BACKGROUND_GC + init_mechanisms(); +} -#endif //USE_REGIONS +void gc_mechanisms::record (gc_history_global* history) +{ +#ifdef MULTIPLE_HEAPS + history->num_heaps = gc_heap::n_heaps; +#else + history->num_heaps = 1; #endif //MULTIPLE_HEAPS -#ifndef USE_REGIONS -class seg_free_spaces -{ - struct seg_free_space - { - BOOL is_plug; - void* start; - }; + history->condemned_generation = condemned_generation; + history->gen0_reduction_count = gen0_reduction_count; + history->reason = reason; + history->pause_mode = (int)pause_mode; + history->mem_pressure = entry_memory_load; + history->global_mechanisms_p = 0; - struct free_space_bucket - { - seg_free_space* free_space; - ptrdiff_t count_add; // Assigned when we first construct the array. - ptrdiff_t count_fit; // How many items left when we are fitting plugs. - }; + // start setting the boolean values. + if (concurrent) + history->set_mechanism_p (global_concurrent); - void move_bucket (int old_power2, int new_power2) - { - // PREFAST warning 22015: old_power2 could be negative - assert (old_power2 >= 0); - assert (old_power2 >= new_power2); + if (compaction) + history->set_mechanism_p (global_compaction); - if (old_power2 == new_power2) - { - return; - } + if (promotion) + history->set_mechanism_p (global_promotion); - seg_free_space* src_index = free_space_buckets[old_power2].free_space; - for (int i = old_power2; i > new_power2; i--) - { - seg_free_space** dest = &(free_space_buckets[i].free_space); - (*dest)++; + if (demotion) + history->set_mechanism_p (global_demotion); - seg_free_space* dest_index = free_space_buckets[i - 1].free_space; - if (i > (new_power2 + 1)) - { - seg_free_space temp = *src_index; - *src_index = *dest_index; - *dest_index = temp; - } - src_index = dest_index; - } + if (card_bundles) + history->set_mechanism_p (global_card_bundles); - free_space_buckets[old_power2].count_fit--; - free_space_buckets[new_power2].count_fit++; - } + if (elevation_reduced) + history->set_mechanism_p (global_elevation); +} -#ifdef _DEBUG +/********************************** + called at the beginning of GC to fix the allocated size to + what is really allocated, or to turn the free area into an unused object + It needs to be called after all of the other allocation contexts have been + fixed since it relies on alloc_allocated. + ********************************/ - void dump_free_space (seg_free_space* item) - { - uint8_t* addr = 0; - size_t len = 0; - if (item->is_plug) - { - mark* m = (mark*)(item->start); - len = pinned_len (m); - addr = pinned_plug (m) - len; - } - else - { - heap_segment* seg = (heap_segment*)(item->start); - addr = heap_segment_plan_allocated (seg); - len = heap_segment_committed (seg) - addr; - } - dprintf (SEG_REUSE_LOG_1, ("[%d]0x%p %zd", heap_num, addr, len)); - } +#ifdef CARD_BUNDLE +// The card bundle keeps track of groups of card words. +size_t align_cardw_on_bundle (size_t cardw) +{ + return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 )); +} - void dump() - { - seg_free_space* item = NULL; - int i = 0; +// Get the card bundle representing a card word +size_t cardw_card_bundle (size_t cardw) +{ + return cardw / card_bundle_size; +} - dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------\nnow the free spaces look like:", heap_num)); - for (i = 0; i < (free_space_bucket_count - 1); i++) - { - dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i))); - dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len")); - item = free_space_buckets[i].free_space; - while (item < free_space_buckets[i + 1].free_space) - { - dump_free_space (item); - item++; - } - dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num)); - } +// Get the first card word in a card bundle +size_t card_bundle_cardw (size_t cardb) +{ + return cardb * card_bundle_size; +} - dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i))); - dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len")); - item = free_space_buckets[i].free_space; - while (item <= &seg_free_space_array[free_space_item_count - 1]) - { - dump_free_space (item); - item++; - } - dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num)); - } +// Takes a pointer to a card bundle table and an address, and returns a pointer that represents +// where a theoretical card bundle table that represents every address (starting from 0) would +// start if the bundle word representing the address were to be located at the pointer passed in. +// The returned 'translated' pointer makes it convenient/fast to calculate where the card bundle +// for a given address is using a simple shift operation on the address. +uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address) +{ + // The number of bytes of heap memory represented by a card bundle word + const size_t heap_bytes_for_bundle_word = card_size * card_word_width * card_bundle_size * card_bundle_word_width; -#endif //_DEBUG + // Each card bundle word is 32 bits + return (uint32_t*)((uint8_t*)cb - (((size_t)lowest_address / heap_bytes_for_bundle_word) * sizeof (uint32_t))); +} - free_space_bucket* free_space_buckets; - seg_free_space* seg_free_space_array; - ptrdiff_t free_space_bucket_count; - ptrdiff_t free_space_item_count; - int base_power2; - int heap_num; -#ifdef _DEBUG - BOOL has_end_of_seg; -#endif //_DEBUG +#endif // CARD_BUNDLE + +size_t size_brick_of (uint8_t* from, uint8_t* end) +{ + assert (((size_t)from & (brick_size-1)) == 0); + assert (((size_t)end & (brick_size-1)) == 0); -public: + return ((end - from) / brick_size) * sizeof (short); +} - seg_free_spaces (int h_number) - { - heap_num = h_number; - } +// Returns the number of DWORDs in the card table that cover the +// range of addresses [from, end[. +size_t count_card_of (uint8_t* from, uint8_t* end) +{ + return card_word (gcard_of (end - 1)) - card_word (gcard_of (from)) + 1; +} - BOOL alloc () - { - size_t total_prealloc_size = - MAX_NUM_BUCKETS * sizeof (free_space_bucket) + - MAX_NUM_FREE_SPACES * sizeof (seg_free_space); +// Returns the number of bytes to allocate for a card table +// that covers the range of addresses [from, end[. +size_t size_card_of (uint8_t* from, uint8_t* end) +{ + return count_card_of (from, end) * sizeof(uint32_t); +} - free_space_buckets = (free_space_bucket*) new (nothrow) uint8_t[total_prealloc_size]; +uint32_t* translate_card_table (uint32_t* ct) +{ + return (uint32_t*)((uint8_t*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(uint32_t)); +} - return (!!free_space_buckets); - } +#ifdef BACKGROUND_GC +inline +uint8_t* align_on_mark_bit (uint8_t* add) +{ + return (uint8_t*)((size_t)(add + (mark_bit_pitch - 1)) & ~(mark_bit_pitch - 1)); +} - // We take the ordered free space array we got from the 1st pass, - // and feed the portion that we decided to use to this method, ie, - // the largest item_count free spaces. - void add_buckets (int base, size_t* ordered_free_spaces, int bucket_count, size_t item_count) - { - assert (free_space_buckets); - assert (item_count <= (size_t)MAX_PTR); +inline +uint8_t* align_lower_mark_bit (uint8_t* add) +{ + return (uint8_t*)((size_t)(add) & ~(mark_bit_pitch - 1)); +} - free_space_bucket_count = bucket_count; - free_space_item_count = item_count; - base_power2 = base; -#ifdef _DEBUG - has_end_of_seg = FALSE; -#endif //_DEBUG +inline +BOOL is_aligned_on_mark_word (uint8_t* add) +{ + return ((size_t)add == ((size_t)(add) & ~(mark_word_size - 1))); +} - ptrdiff_t total_item_count = 0; - ptrdiff_t i = 0; +inline +uint8_t* align_lower_mark_word (uint8_t* add) +{ + return (uint8_t*)((size_t)(add) & ~(mark_word_size - 1)); +} - seg_free_space_array = (seg_free_space*)(free_space_buckets + free_space_bucket_count); +uint8_t* mark_bit_address (size_t mark_bit) +{ + return (uint8_t*)(mark_bit*mark_bit_pitch); +} - for (i = 0; i < (ptrdiff_t)item_count; i++) - { - seg_free_space_array[i].start = 0; - seg_free_space_array[i].is_plug = FALSE; - } +//In order to eliminate the lowest_address in the mark array +//computations (mark_word_of, etc) mark_array is offset +// according to the lowest_address. +uint32_t* translate_mark_array (uint32_t* ma) +{ + return (uint32_t*)((uint8_t*)ma - size_mark_array_of (0, g_gc_lowest_address)); +} - for (i = 0; i < bucket_count; i++) - { - free_space_buckets[i].count_add = ordered_free_spaces[i]; - free_space_buckets[i].count_fit = ordered_free_spaces[i]; - free_space_buckets[i].free_space = &seg_free_space_array[total_item_count]; - total_item_count += free_space_buckets[i].count_add; - } +#endif //BACKGROUND_GC - assert (total_item_count == (ptrdiff_t)item_count); - } +void own_card_table (uint32_t* c_table) +{ + card_table_refcount (c_table) += 1; +} - // If we are adding a free space before a plug we pass the - // mark stack position so we can update the length; we could - // also be adding the free space after the last plug in which - // case start is the segment which we'll need to update the - // heap_segment_plan_allocated. - void add (void* start, BOOL plug_p, BOOL first_p) +void delete_next_card_table (uint32_t* c_table) +{ + uint32_t* n_table = card_table_next (c_table); + if (n_table) { - size_t size = (plug_p ? - pinned_len ((mark*)start) : - (heap_segment_committed ((heap_segment*)start) - - heap_segment_plan_allocated ((heap_segment*)start))); - - if (plug_p) + if (card_table_next (n_table)) { - dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space before plug: %zd", heap_num, size)); + delete_next_card_table (n_table); } - else + if (card_table_refcount (n_table) == 0) { - dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space at end of seg: %zd", heap_num, size)); -#ifdef _DEBUG - has_end_of_seg = TRUE; -#endif //_DEBUG + destroy_card_table (n_table); + card_table_next (c_table) = 0; } + } +} - if (first_p) +void release_card_table (uint32_t* c_table) +{ + assert (card_table_refcount (c_table) >0); + card_table_refcount (c_table) -= 1; + if (card_table_refcount (c_table) == 0) + { + delete_next_card_table (c_table); + if (card_table_next (c_table) == 0) { - size_t eph_gen_starts = gc_heap::eph_gen_starts_size; - size -= eph_gen_starts; - if (plug_p) + destroy_card_table (c_table); + // sever the link from the parent + if (&g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))] == c_table) { - mark* m = (mark*)(start); - pinned_len (m) -= eph_gen_starts; + g_gc_card_table = 0; + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + g_gc_card_bundle_table = 0; +#endif +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + SoftwareWriteWatch::StaticClose(); +#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP } else { - heap_segment* seg = (heap_segment*)start; - heap_segment_plan_allocated (seg) += eph_gen_starts; + uint32_t* p_table = &g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))]; + if (p_table) + { + while (p_table && (card_table_next (p_table) != c_table)) + p_table = card_table_next (p_table); + card_table_next (p_table) = 0; + } } } + } +} - int bucket_power2 = index_of_highest_set_bit (size); - if (bucket_power2 < base_power2) - { - return; - } - - free_space_bucket* bucket = &free_space_buckets[bucket_power2 - base_power2]; - - seg_free_space* bucket_free_space = bucket->free_space; - assert (plug_p || (!plug_p && bucket->count_add)); - - if (bucket->count_add == 0) - { - dprintf (SEG_REUSE_LOG_1, ("[%d]Already have enough of 2^%d", heap_num, bucket_power2)); - return; - } - - ptrdiff_t index = bucket->count_add - 1; +void destroy_card_table (uint32_t* c_table) +{ +// delete (uint32_t*)&card_table_refcount(c_table); - dprintf (SEG_REUSE_LOG_1, ("[%d]Building free spaces: adding %p; len: %zd (2^%d)", - heap_num, - (plug_p ? - (pinned_plug ((mark*)start) - pinned_len ((mark*)start)) : - heap_segment_plan_allocated ((heap_segment*)start)), - size, - bucket_power2)); + size_t size = card_table_size(c_table); + gc_heap::destroy_card_table_helper (c_table); + GCToOSInterface::VirtualRelease (&card_table_refcount(c_table), size); + dprintf (2, ("Table Virtual Free : %zx", (size_t)&card_table_refcount(c_table))); +} - if (plug_p) - { - bucket_free_space[index].is_plug = TRUE; - } +uint8_t** make_mark_list (size_t size) +{ + uint8_t** mark_list = new (nothrow) uint8_t* [size]; + return mark_list; +} - bucket_free_space[index].start = start; - bucket->count_add--; - } +#define swap(a,b){uint8_t* t; t = a; a = b; b = t;} -#ifdef _DEBUG - // Do a consistency check after all free spaces are added. - void check() +#ifndef USE_INTROSORT +void qsort1( uint8_t* *low, uint8_t* *high, unsigned int depth) +{ + if (((low + 16) >= high) || (depth > 100)) { - ptrdiff_t i = 0; - int end_of_seg_count = 0; - - for (i = 0; i < free_space_item_count; i++) + //insertion sort + uint8_t **i, **j; + for (i = low+1; i <= high; i++) { - assert (seg_free_space_array[i].start); - if (!(seg_free_space_array[i].is_plug)) + uint8_t* val = *i; + for (j=i;j >low && val<*(j-1);j--) { - end_of_seg_count++; + *j=*(j-1); } - } - - if (has_end_of_seg) - { - assert (end_of_seg_count == 1); - } - else - { - assert (end_of_seg_count == 0); - } - - for (i = 0; i < free_space_bucket_count; i++) - { - assert (free_space_buckets[i].count_add == 0); + *j=val; } } - -#endif //_DEBUG - - uint8_t* fit (uint8_t* old_loc, - size_t plug_size - REQD_ALIGN_AND_OFFSET_DCL) + else { - if (old_loc) - { -#ifdef SHORT_PLUGS - assert (!is_plug_padded (old_loc)); -#endif //SHORT_PLUGS - assert (!node_realigned (old_loc)); - } - - size_t saved_plug_size = plug_size; - -#ifdef FEATURE_STRUCTALIGN - // BARTOKTODO (4841): this code path is disabled (see can_fit_all_blocks_p) until we take alignment requirements into account - _ASSERTE(requiredAlignment == DATA_ALIGNMENT && false); -#endif // FEATURE_STRUCTALIGN - - size_t plug_size_to_fit = plug_size; - - // best fit is only done for gen1 to gen2 and we do not pad in gen2. - // however we must account for requirements of large alignment. - // which may result in realignment padding. -#ifdef RESPECT_LARGE_ALIGNMENT - plug_size_to_fit += switch_alignment_size(FALSE); -#endif //RESPECT_LARGE_ALIGNMENT - - int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size))); - ptrdiff_t i; - uint8_t* new_address = 0; - - if (plug_power2 < base_power2) - { - plug_power2 = base_power2; - } - - int chosen_power2 = plug_power2 - base_power2; -retry: - for (i = chosen_power2; i < free_space_bucket_count; i++) - { - if (free_space_buckets[i].count_fit != 0) - { - break; - } - chosen_power2++; - } - - dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting plug len %zd (2^%d) using 2^%d free space", - heap_num, - plug_size, - plug_power2, - (chosen_power2 + base_power2))); - - assert (i < free_space_bucket_count); - - seg_free_space* bucket_free_space = free_space_buckets[chosen_power2].free_space; - ptrdiff_t free_space_count = free_space_buckets[chosen_power2].count_fit; - size_t new_free_space_size = 0; - BOOL can_fit = FALSE; - size_t pad = 0; + uint8_t *pivot, **left, **right; - for (i = 0; i < free_space_count; i++) - { - size_t free_space_size = 0; - pad = 0; + //sort low middle and high + if (*(low+((high-low)/2)) < *low) + swap (*(low+((high-low)/2)), *low); + if (*high < *low) + swap (*low, *high); + if (*high < *(low+((high-low)/2))) + swap (*(low+((high-low)/2)), *high); - if (bucket_free_space[i].is_plug) + swap (*(low+((high-low)/2)), *(high-1)); + pivot = *(high-1); + left = low; right = high-1; + while (1) { + while (*(--right) > pivot); + while (*(++left) < pivot); + if (left < right) { - mark* m = (mark*)(bucket_free_space[i].start); - uint8_t* plug_free_space_start = pinned_plug (m) - pinned_len (m); - - if (!((old_loc == 0) || same_large_alignment_p (old_loc, plug_free_space_start))) - { - pad = switch_alignment_size (FALSE); - } - - plug_size = saved_plug_size + pad; - - free_space_size = pinned_len (m); - new_address = pinned_plug (m) - pinned_len (m); - - if (free_space_size >= (plug_size + Align (min_obj_size)) || - free_space_size == plug_size) - { - new_free_space_size = free_space_size - plug_size; - pinned_len (m) = new_free_space_size; -#ifdef SIMPLE_DPRINTF - dprintf (SEG_REUSE_LOG_0, ("[%d]FP: 0x%p->0x%p(%zx)(%zx), [0x%p (2^%d) -> [0x%p (2^%d)", - heap_num, - old_loc, - new_address, - (plug_size - pad), - pad, - pinned_plug (m), - index_of_highest_set_bit (free_space_size), - (pinned_plug (m) - pinned_len (m)), - index_of_highest_set_bit (new_free_space_size))); -#endif //SIMPLE_DPRINTF - - if (pad != 0) - { - set_node_realigned (old_loc); - } - - can_fit = TRUE; - } + swap(*left, *right); } else - { - heap_segment* seg = (heap_segment*)(bucket_free_space[i].start); - free_space_size = heap_segment_committed (seg) - heap_segment_plan_allocated (seg); - - if (!((old_loc == 0) || same_large_alignment_p (old_loc, heap_segment_plan_allocated (seg)))) - { - pad = switch_alignment_size (FALSE); - } - - plug_size = saved_plug_size + pad; - - if (free_space_size >= (plug_size + Align (min_obj_size)) || - free_space_size == plug_size) - { - new_address = heap_segment_plan_allocated (seg); - new_free_space_size = free_space_size - plug_size; - heap_segment_plan_allocated (seg) = new_address + plug_size; -#ifdef SIMPLE_DPRINTF - dprintf (SEG_REUSE_LOG_0, ("[%d]FS: 0x%p-> 0x%p(%zd) (2^%d) -> 0x%p (2^%d)", - heap_num, - old_loc, - new_address, - (plug_size - pad), - index_of_highest_set_bit (free_space_size), - heap_segment_plan_allocated (seg), - index_of_highest_set_bit (new_free_space_size))); -#endif //SIMPLE_DPRINTF - - if (pad != 0) - set_node_realigned (old_loc); - - can_fit = TRUE; - } - } - - if (can_fit) - { break; - } - } - - if (!can_fit) - { - assert (chosen_power2 == 0); - chosen_power2 = 1; - goto retry; } + swap (*left, *(high-1)); + qsort1(low, left-1, depth+1); + qsort1(left+1, high, depth+1); + } +} +#endif //USE_INTROSORT - new_address += pad; - assert ((chosen_power2 && (i == 0)) || - ((!chosen_power2) && (i < free_space_count))); - - int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size); - - if (new_bucket_power2 < base_power2) - { - new_bucket_power2 = base_power2; - } +#ifdef USE_VXSORT +void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high) +{ + // above this threshold, using AVX2 for sorting will likely pay off + // despite possible downclocking on some devices + const ptrdiff_t AVX2_THRESHOLD_SIZE = 8 * 1024; - move_bucket (chosen_power2, new_bucket_power2 - base_power2); + // above this threshold, using AVX512F for sorting will likely pay off + // despite possible downclocking on current devices + const ptrdiff_t AVX512F_THRESHOLD_SIZE = 128 * 1024; - //dump(); + // above this threshold, using NEON for sorting will likely pay off + const ptrdiff_t NEON_THRESHOLD_SIZE = 1024; - return new_address; - } + if (item_count <= 1) + return; - void cleanup () +#if defined(TARGET_AMD64) + if (IsSupportedInstructionSet (InstructionSet::AVX2) && (item_count > AVX2_THRESHOLD_SIZE)) { - if (free_space_buckets) + dprintf(3, ("Sorting mark lists")); + + // use AVX512F only if the list is large enough to pay for downclocking impact + if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE)) { - delete [] free_space_buckets; + do_vxsort_avx512 (item_array, &item_array[item_count - 1], range_low, range_high); } - if (seg_free_space_array) + else { - delete [] seg_free_space_array; + do_vxsort_avx2 (item_array, &item_array[item_count - 1], range_low, range_high); } } -}; -#endif //!USE_REGIONS - -#define marked(i) header(i)->IsMarked() -#define set_marked(i) header(i)->SetMarked() -#define clear_marked(i) header(i)->ClearMarked() -#define pinned(i) header(i)->IsPinned() -#define set_pinned(i) header(i)->SetPinned() -#define clear_pinned(i) header(i)->GetHeader()->ClrGCBit(); - -inline size_t my_get_size (Object* ob) -{ - MethodTable* mT = header(ob)->GetMethodTable(); - - return (mT->GetBaseSize() + - (mT->HasComponentSize() ? - ((size_t)((CObjectHeader*)ob)->GetNumComponents() * mT->RawGetComponentSize()) : 0)); +#elif defined(TARGET_ARM64) + if (IsSupportedInstructionSet (InstructionSet::NEON) && (item_count > NEON_THRESHOLD_SIZE)) + { + dprintf(3, ("Sorting mark lists")); + do_vxsort_neon (item_array, &item_array[item_count - 1], range_low, range_high); + } +#endif + else + { + dprintf (3, ("Sorting mark lists")); + introsort::sort (item_array, &item_array[item_count - 1], 0); + } +#ifdef _DEBUG + // check the array is sorted + for (ptrdiff_t i = 0; i < item_count - 1; i++) + { + assert (item_array[i] <= item_array[i + 1]); + } + // check that the ends of the array are indeed in range + // together with the above this implies all elements are in range + assert ((range_low <= item_array[0]) && (item_array[item_count - 1] <= range_high)); +#endif } - -#define size(i) my_get_size (header(i)) - -#define contain_pointers(i) header(i)->ContainsGCPointers() -#ifdef COLLECTIBLE_CLASS -#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointersOrCollectible() - -#define get_class_object(i) GCToEEInterface::GetLoaderAllocatorObjectForGC((Object *)i) -#define is_collectible(i) method_table(i)->Collectible() -#else //COLLECTIBLE_CLASS -#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointers() -#endif //COLLECTIBLE_CLASS - -#ifdef USE_REGIONS - - -static GCSpinLock write_barrier_spin_lock; - -#endif //USE_REGIONS +#endif //USE_VXSORT #ifdef WRITE_WATCH uint8_t* g_addresses [array_size+2]; // to get around the bug in GetWriteWatch #ifdef BACKGROUND_GC -const size_t ww_reset_quantum = 128*1024*1024; +extern const size_t ww_reset_quantum = 128*1024*1024; #endif //BACKGROUND_GC #endif //WRITE_WATCH @@ -7235,32 +3821,11 @@ gc_heap::destroy_gc_heap(gc_heap* heap) delete heap; } -enum { -CORINFO_EXCEPTION_GC = 0xE0004743 // 'GC' -}; - - -#define mark_stack_empty_p() (mark_stack_base == mark_stack_tos) - #ifdef USE_REGIONS #ifdef DYNAMIC_HEAP_COUNT // check that the fields of a decommissioned heap have their expected values, // i.e. were not inadvertently modified -#define DECOMMISSIONED_VALUE 0xdec0dec0dec0dec0 -static const size_t DECOMMISSIONED_SIZE_T = DECOMMISSIONED_VALUE; -static const ptrdiff_t DECOMMISSIONED_PTRDIFF_T = (ptrdiff_t)DECOMMISSIONED_VALUE; -static const ptrdiff_t DECOMMISSIONED_UINT64_T = (uint64_t)DECOMMISSIONED_VALUE; -static uint8_t* const DECOMMISSIONED_UINT8_T_P = (uint8_t*)DECOMMISSIONED_VALUE; -static uint8_t** const DECOMMISSIONED_UINT8_T_PP = (uint8_t**)DECOMMISSIONED_VALUE; -static PTR_heap_segment const DECOMMISSIONED_REGION_P = (PTR_heap_segment)DECOMMISSIONED_VALUE; -static mark* const DECOMMISSIONED_MARK_P = (mark*)DECOMMISSIONED_VALUE; -static const BOOL DECOMMISSIONED_BOOL = 0xdec0dec0; -static const BOOL DECOMMISSIONED_INT = (int)0xdec0dec0; -static const float DECOMMISSIONED_FLOAT = (float)DECOMMISSIONED_VALUE; - -static const ptrdiff_t UNINITIALIZED_VALUE = 0xbaadbaadbaadbaad; - float log_with_base (float x, float base) { @@ -7331,212 +3896,12 @@ float gc_heap::dynamic_heap_count_data_t::slope (float* y, int n, float* avg) #ifdef MULTIPLE_HEAPS -#ifdef GC_CONFIG_DRIVEN -#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}} -#else //GC_CONFIG_DRIVEN -#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}} -#endif //GC_CONFIG_DRIVEN - -#define m_boundary_fullgc(o) {} - -#else //MULTIPLE_HEAPS - -#ifdef GC_CONFIG_DRIVEN -#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;} -#else -#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}if (slow > o) slow = o; if (shigh < o) shigh = o;} -#endif //GC_CONFIG_DRIVEN - -#define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;} - -#endif //MULTIPLE_HEAPS - -#ifdef USE_REGIONS -inline bool is_in_heap_range (uint8_t* o) -{ -#ifdef FEATURE_BASICFREEZE - // we may have frozen objects in read only segments - // outside of the reserved address range of the gc heap - assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) || - (o == nullptr) || (ro_segment_lookup (o) != nullptr)); - return ((g_gc_lowest_address <= o) && (o < g_gc_highest_address)); -#else //FEATURE_BASICFREEZE - // without frozen objects, every non-null pointer must be - // within the heap - assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address)); - return (o != nullptr); -#endif //FEATURE_BASICFREEZE -} - -#endif //USE_REGIONS - -#define new_start() {if (ppstop <= start) {break;} else {parm = start}} -#define ignore_start 0 -#define use_start 1 - -#define go_through_object(mt,o,size,parm,start,start_useful,limit,exp) \ -{ \ - CGCDesc* map = CGCDesc::GetCGCDescFromMT((MethodTable*)(mt)); \ - CGCDescSeries* cur = map->GetHighestSeries(); \ - ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries(); \ - \ - if (cnt >= 0) \ - { \ - CGCDescSeries* last = map->GetLowestSeries(); \ - uint8_t** parm = 0; \ - do \ - { \ - assert (parm <= (uint8_t**)((o) + cur->GetSeriesOffset())); \ - parm = (uint8_t**)((o) + cur->GetSeriesOffset()); \ - uint8_t** ppstop = \ - (uint8_t**)((uint8_t*)parm + cur->GetSeriesSize() + (size));\ - if (!start_useful || (uint8_t*)ppstop > (start)) \ - { \ - if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);\ - while (parm < ppstop) \ - { \ - {exp} \ - parm++; \ - } \ - } \ - cur--; \ - \ - } while (cur >= last); \ - } \ - else \ - { \ - /* Handle the repeating case - array of valuetypes */ \ - uint8_t** parm = (uint8_t**)((o) + cur->startoffset); \ - if (start_useful && start > (uint8_t*)parm) \ - { \ - ptrdiff_t cs = mt->RawGetComponentSize(); \ - parm = (uint8_t**)((uint8_t*)parm + (((start) - (uint8_t*)parm)/cs)*cs); \ - } \ - while ((uint8_t*)parm < ((o)+(size)-plug_skew)) \ - { \ - for (ptrdiff_t __i = 0; __i > cnt; __i--) \ - { \ - HALF_SIZE_T skip = (cur->val_serie + __i)->skip; \ - HALF_SIZE_T nptrs = (cur->val_serie + __i)->nptrs; \ - uint8_t** ppstop = parm + nptrs; \ - if (!start_useful || (uint8_t*)ppstop > (start)) \ - { \ - if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start); \ - do \ - { \ - {exp} \ - parm++; \ - } while (parm < ppstop); \ - } \ - parm = (uint8_t**)((uint8_t*)ppstop + skip); \ - } \ - } \ - } \ -} - -#define go_through_object_nostart(mt,o,size,parm,exp) {go_through_object(mt,o,size,parm,o,ignore_start,(o + size),exp); } - -// 1 thing to note about this macro: -// 1) you can use *parm safely but in general you don't want to use parm -// because for the collectible types it's not an address on the managed heap. -#ifndef COLLECTIBLE_CLASS -#define go_through_object_cl(mt,o,size,parm,exp) \ -{ \ - if (header(o)->ContainsGCPointers()) \ - { \ - go_through_object_nostart(mt,o,size,parm,exp); \ - } \ -} -#else //COLLECTIBLE_CLASS -#define go_through_object_cl(mt,o,size,parm,exp) \ -{ \ - if (header(o)->Collectible()) \ - { \ - uint8_t* class_obj = get_class_object (o); \ - uint8_t** parm = &class_obj; \ - do {exp} while (false); \ - } \ - if (header(o)->ContainsGCPointers()) \ - { \ - go_through_object_nostart(mt,o,size,parm,exp); \ - } \ -} -#endif //COLLECTIBLE_CLASS - -// enable on processors known to have a useful prefetch instruction -#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) -#define PREFETCH -#endif - -#ifdef PREFETCH -inline void Prefetch(void* addr) -{ -#ifdef TARGET_WINDOWS - -#if defined(TARGET_AMD64) || defined(TARGET_X86) - -#ifndef _MM_HINT_T0 -#define _MM_HINT_T0 1 -#endif - _mm_prefetch((const char*)addr, _MM_HINT_T0); -#elif defined(TARGET_ARM64) - __prefetch((const char*)addr); -#endif //defined(TARGET_AMD64) || defined(TARGET_X86) - -#elif defined(TARGET_UNIX) - __builtin_prefetch(addr); -#else //!(TARGET_WINDOWS || TARGET_UNIX) - UNREFERENCED_PARAMETER(addr); -#endif //TARGET_WINDOWS -} -#else //PREFETCH -inline void Prefetch (void* addr) -{ - UNREFERENCED_PARAMETER(addr); -} -#endif //PREFETCH - -#define stolen 2 -#define partial 1 -#define partial_object 3 - -inline -BOOL stolen_p (uint8_t* r) -{ - return (((size_t)r&2) && !((size_t)r&1)); -} -inline -BOOL ready_p (uint8_t* r) -{ - return ((size_t)r != 1); -} -inline -BOOL partial_p (uint8_t* r) -{ - return (((size_t)r&1) && !((size_t)r&2)); -} -inline -BOOL straight_ref_p (uint8_t* r) -{ - return (!stolen_p (r) && !partial_p (r)); -} -inline -BOOL partial_object_p (uint8_t* r) -{ - return (((size_t)r & partial_object) == partial_object); -} - - - -#ifdef MULTIPLE_HEAPS - -static VOLATILE(BOOL) s_fUnpromotedHandles = FALSE; -static VOLATILE(BOOL) s_fUnscannedPromotions = FALSE; -static VOLATILE(BOOL) s_fScanRequired; +VOLATILE(BOOL) s_fUnpromotedHandles = FALSE; +VOLATILE(BOOL) s_fUnscannedPromotions = FALSE; +VOLATILE(BOOL) s_fScanRequired; #else //MULTIPLE_HEAPS #endif //MULTIPLE_HEAPS -#ifdef FEATURE_STRUCTALIGN // // The word with left child, right child, and align info is laid out as follows: // @@ -7555,70 +3920,6 @@ static VOLATILE(BOOL) s_fScanRequired; // by adding the alignment iff the misalignment is non-zero and less than min_obj_size. // -// The number of bits in a brick. -#if defined (TARGET_AMD64) -#define brick_bits (12) -#else -#define brick_bits (11) -#endif //TARGET_AMD64 -static_assert(brick_size == (1 << brick_bits)); - -// The number of bits needed to represent the offset to a child node. -// "brick_bits + 1" allows us to represent a signed offset within a brick. -#define child_bits (brick_bits + 1 - LOG2_PTRSIZE) - -// The number of bits in each of the pad hi, pad lo fields. -#define pad_bits (sizeof(short) * 8 - child_bits) - -#define child_from_short(w) (((signed short)(w) / (1 << (pad_bits - LOG2_PTRSIZE))) & ~((1 << LOG2_PTRSIZE) - 1)) -#define pad_mask ((1 << pad_bits) - 1) -#define pad_from_short(w) ((size_t)(w) & pad_mask) -#else // FEATURE_STRUCTALIGN -#define child_from_short(w) (w) -#endif // FEATURE_STRUCTALIGN - -inline -short node_left_child(uint8_t* node) -{ - return child_from_short(((plug_and_pair*)node)[-1].m_pair.left); -} - -inline -void set_node_left_child(uint8_t* node, ptrdiff_t val) -{ - assert (val > -(ptrdiff_t)brick_size); - assert (val < (ptrdiff_t)brick_size); - assert (Aligned (val)); -#ifdef FEATURE_STRUCTALIGN - size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.left); - ((plug_and_pair*)node)[-1].m_pair.left = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad; -#else // FEATURE_STRUCTALIGN - ((plug_and_pair*)node)[-1].m_pair.left = (short)val; -#endif // FEATURE_STRUCTALIGN - assert (node_left_child (node) == val); -} - -inline -short node_right_child(uint8_t* node) -{ - return child_from_short(((plug_and_pair*)node)[-1].m_pair.right); -} - -inline -void set_node_right_child(uint8_t* node, ptrdiff_t val) -{ - assert (val > -(ptrdiff_t)brick_size); - assert (val < (ptrdiff_t)brick_size); - assert (Aligned (val)); -#ifdef FEATURE_STRUCTALIGN - size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.right); - ((plug_and_pair*)node)[-1].m_pair.right = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad; -#else // FEATURE_STRUCTALIGN - ((plug_and_pair*)node)[-1].m_pair.right = (short)val; -#endif // FEATURE_STRUCTALIGN - assert (node_right_child (node) == val); -} - #ifdef FEATURE_STRUCTALIGN void node_aligninfo (uint8_t* node, int& requiredAlignment, ptrdiff_t& pad) { @@ -7683,39 +3984,6 @@ void set_node_aligninfo (uint8_t* node, int requiredAlignment, ptrdiff_t pad) } #endif // FEATURE_STRUCTALIGN -inline -void loh_set_node_relocation_distance(uint8_t* node, ptrdiff_t val) -{ - ptrdiff_t* place = &(((loh_obj_and_pad*)node)[-1].reloc); - *place = val; -} - -inline -ptrdiff_t loh_node_relocation_distance(uint8_t* node) -{ - return (((loh_obj_and_pad*)node)[-1].reloc); -} - -inline -ptrdiff_t node_relocation_distance (uint8_t* node) -{ - return (((plug_and_reloc*)(node))[-1].reloc & ~3); -} - -inline -void set_node_relocation_distance(uint8_t* node, ptrdiff_t val) -{ - assert (val == (val & ~3)); - ptrdiff_t* place = &(((plug_and_reloc*)node)[-1].reloc); - //clear the left bit and the relocation field - *place &= 1; - *place |= val; -} - -#define node_left_p(node) (((plug_and_reloc*)(node))[-1].reloc & 2) - -#define set_node_left(node) ((plug_and_reloc*)(node))[-1].reloc |= 2; - #ifndef FEATURE_STRUCTALIGN void set_node_realigned(uint8_t* node) { @@ -7732,25 +4000,6 @@ void clear_node_realigned(uint8_t* node) } #endif // FEATURE_STRUCTALIGN -inline -size_t node_gap_size (uint8_t* node) -{ - return ((plug_and_gap *)node)[-1].gap; -} - -void set_gap_size (uint8_t* node, size_t size) -{ - assert (Aligned (size)); - - // clear the 2 uint32_t used by the node. - ((plug_and_gap *)node)[-1].reloc = 0; - ((plug_and_gap *)node)[-1].lr =0; - ((plug_and_gap *)node)[-1].gap = size; - - assert ((size == 0 )||(size >= sizeof(plug_and_reloc))); - -} - /***************************** Called after compact phase to fix all generation gaps ********************************/ @@ -7784,47 +4033,6 @@ void heap_segment::thread_free_obj (uint8_t* obj, size_t s) #endif //USE_REGIONS -inline -uint8_t* tree_search (uint8_t* tree, uint8_t* old_address) -{ - uint8_t* candidate = 0; - int cn; - while (1) - { - if (tree < old_address) - { - if ((cn = node_right_child (tree)) != 0) - { - assert (candidate < tree); - candidate = tree; - tree = tree + cn; - Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left); - continue; - } - else - break; - } - else if (tree > old_address) - { - if ((cn = node_left_child (tree)) != 0) - { - tree = tree + cn; - Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left); - continue; - } - else - break; - } else - break; - } - if (tree <= old_address) - return tree; - else if (candidate) - return candidate; - else - return tree; -} - #ifdef MULTIPLE_HEAPS @@ -7886,9 +4094,6 @@ gc_heap::bgc_suspend_EE () } #endif //MULTIPLE_HEAPS -#ifdef BGC_SERVO_TUNING - -#endif //BGC_SERVO_TUNING #endif //BACKGROUND_GC //because of heap expansion, computing end is complicated. @@ -8120,21 +4325,6 @@ void StressHeapDummy (); #endif // STRESS_HEAP #endif // !FEATURE_NATIVEAOT -#ifdef FEATURE_PREMORTEM_FINALIZATION -#define REGISTER_FOR_FINALIZATION(_object, _size) \ - hp->finalize_queue->RegisterForFinalization (0, (_object), (_size)) -#else // FEATURE_PREMORTEM_FINALIZATION -#define REGISTER_FOR_FINALIZATION(_object, _size) true -#endif // FEATURE_PREMORTEM_FINALIZATION - -#define CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(_object, _size, _register) do { \ - if ((_object) == NULL || ((_register) && !REGISTER_FOR_FINALIZATION(_object, _size))) \ - { \ - STRESS_LOG_OOM_STACK(_size); \ - return NULL; \ - } \ -} while (false) - #if defined(WRITE_BARRIER_CHECK) && !defined (SERVER_GC) // This code is designed to catch the failure to update the write barrier // The way it works is to copy the whole heap right after every GC. The write @@ -8482,68 +4672,6 @@ void PopulateDacVars(GcDacVars *gcDacVars) } } -inline -BOOL gc_heap::ephemeral_pointer_p (uint8_t* o) -{ -#ifdef USE_REGIONS - int gen_num = object_gennum ((uint8_t*)o); - assert (gen_num >= 0); - return (gen_num < max_generation); -#else - return ((o >= ephemeral_low) && (o < ephemeral_high)); -#endif //USE_REGIONS -} - -// This needs to check the range that's covered by bookkeeping because find_object will -// need to look at the brick table. -inline -bool gc_heap::is_in_find_object_range (uint8_t* o) -{ - if (o == nullptr) - { - return false; - } -#if defined(USE_REGIONS) && defined(FEATURE_CONSERVATIVE_GC) - return ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed)); -#else //USE_REGIONS && FEATURE_CONSERVATIVE_GC - if ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)) - { -#ifdef USE_REGIONS - assert ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed)); -#endif //USE_REGIONS - return true; - } - else - { - return false; - } -#endif //USE_REGIONS && FEATURE_CONSERVATIVE_GC -} - -#ifdef USE_REGIONS - -// This assumes o is guaranteed to be in a region. -inline -bool gc_heap::is_in_condemned_gc (uint8_t* o) -{ - assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)); - - int condemned_gen = settings.condemned_generation; - if (condemned_gen < max_generation) - { - int gen = get_region_gen_num (o); - if (gen > condemned_gen) - { - return false; - } - } - - return true; -} - -#endif //USE_REGIONS - - #if defined (_MSC_VER) && defined (TARGET_X86) #pragma optimize("y", on) // Small critical routines, don't put in EBP frame #endif //_MSC_VER && TARGET_X86 @@ -8631,36 +4759,6 @@ gc_heap* seg_mapping_table_heap_of_gc (uint8_t* o) } #endif //MULTIPLE_HEAPS -#if !defined(_DEBUG) && !defined(__GNUC__) -inline // This causes link errors if global optimization is off -#endif //!_DEBUG && !__GNUC__ -gc_heap* gc_heap::heap_of (uint8_t* o) -{ -#ifdef MULTIPLE_HEAPS - if (o == 0) - return g_heaps [0]; - gc_heap* hp = seg_mapping_table_heap_of (o); - return (hp ? hp : g_heaps[0]); -#else //MULTIPLE_HEAPS - UNREFERENCED_PARAMETER(o); - return __this; -#endif //MULTIPLE_HEAPS -} - -inline -gc_heap* gc_heap::heap_of_gc (uint8_t* o) -{ -#ifdef MULTIPLE_HEAPS - if (o == 0) - return g_heaps [0]; - gc_heap* hp = seg_mapping_table_heap_of_gc (o); - return (hp ? hp : g_heaps[0]); -#else //MULTIPLE_HEAPS - UNREFERENCED_PARAMETER(o); - return __this; -#endif //MULTIPLE_HEAPS -} - // will find all heap objects (large and small) // // Callers of this method need to guarantee the interior pointer is within the heap range. @@ -8746,17 +4844,6 @@ size_t gc_heap::get_generation_start_size (int gen_number) #endif //!USE_REGIONS } -inline -int gc_heap::get_num_heaps() -{ -#ifdef MULTIPLE_HEAPS - return n_heaps; -#else - return 1; -#endif //MULTIPLE_HEAPS -} - - void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_bounds_check) { WriteBarrierParameters args = {}; @@ -8782,25 +4869,4 @@ void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_b GCToEEInterface::StompWriteBarrier(&args); } -// Category-specific gc_heap method files -#include "region_allocator.cpp" -#include "region_free_list.cpp" -#include "finalization.cpp" -#include "interface.cpp" -#include "allocation.cpp" -#include "mark_phase.cpp" -#include "plan_phase.cpp" -#include "relocate_compact.cpp" -#include "sweep.cpp" -#include "background.cpp" -#include "regions_segments.cpp" -#include "card_table.cpp" -#include "memory.cpp" -#include "diagnostics.cpp" -#include "dynamic_tuning.cpp" -#include "no_gc.cpp" -#include "dynamic_heap_count.cpp" -#include "init.cpp" -#include "collect.cpp" - } diff --git a/src/coreclr/gc/gceesvr.cpp b/src/coreclr/gc/gceesvr.cpp deleted file mode 100644 index 9b37a77b0ae697..00000000000000 --- a/src/coreclr/gc/gceesvr.cpp +++ /dev/null @@ -1,7 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#if defined(FEATURE_SVR_GC) -#define SERVER_GC 1 -#include "gcee.cpp" -#endif // FEATURE_SVR_GC diff --git a/src/coreclr/gc/gceewks.cpp b/src/coreclr/gc/gceewks.cpp deleted file mode 100644 index d0e275be2bb7ba..00000000000000 --- a/src/coreclr/gc/gceewks.cpp +++ /dev/null @@ -1,8 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#ifdef SERVER_GC -#undef SERVER_GC -#endif - -#include "gcee.cpp" diff --git a/src/coreclr/gc/gcinternal.h b/src/coreclr/gc/gcinternal.h new file mode 100644 index 00000000000000..8e33a84168dcb1 --- /dev/null +++ b/src/coreclr/gc/gcinternal.h @@ -0,0 +1,4448 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef GC_INTERNAL_H +#define GC_INTERNAL_H + +#include "common.h" +#include "gcenv.h" + +#include "gc.h" +#include "gcscan.h" +#include "gcdesc.h" +#include "softwarewritewatch.h" +#include "handletable.h" +#include "handletable.inl" +#include "gcenv.inl" +#include "gceventstatus.h" +#include + +// If FEATURE_INTERPRETER is set, always enable the GC side of FEATURE_CONSERVATIVE_GC +#ifdef FEATURE_INTERPRETER +#ifndef FEATURE_CONSERVATIVE_GC +#define FEATURE_CONSERVATIVE_GC +#endif +#endif // FEATURE_INTERPRETER + +#ifdef __INTELLISENSE__ +#if defined(FEATURE_SVR_GC) + +#define SERVER_GC 1 + +#else // defined(FEATURE_SVR_GC) + +#ifdef SERVER_GC +#undef SERVER_GC +#endif + +#endif // defined(FEATURE_SVR_GC) +#endif // __INTELLISENSE__ + +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +#include "vxsort/do_vxsort.h" +#define USE_VXSORT +#else +#define USE_INTROSORT +#endif // TARGET_AMD64 || TARGET_ARM64 +#include "introsort.h" + +#ifdef SERVER_GC +namespace SVR { +#else // SERVER_GC +namespace WKS { +#endif // SERVER_GC + +#include "gcimpl.h" +#include "gcpriv.h" + +#ifdef DACCESS_COMPILE +#error this source file should not be compiled with DACCESS_COMPILE! +#endif //DACCESS_COMPILE + +// We just needed a simple random number generator for testing. +class gc_rand +{ +public: + static uint64_t x; + + static uint64_t get_rand() + { + x = (314159269*x+278281) & 0x7FFFFFFF; + return x; + } + + // obtain random number in the range 0 .. r-1 + static uint64_t get_rand(uint64_t r) + { + // require r >= 0 + uint64_t x = (uint64_t)((get_rand() * r) >> 31); + return x; + } +}; + +#define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0)) +#define MAX_YP_SPIN_COUNT_UNIT 32768 +#define MIN_SOH_CROSS_GEN_REFS (400) +#define MIN_LOH_CROSS_GEN_REFS (800) + +#ifdef SERVER_GC +#define partial_size_th 100 +#define num_partial_refs 64 +#else //SERVER_GC +#define partial_size_th 100 +#define num_partial_refs 32 +#endif //SERVER_GC + +#ifdef HOST_64BIT +#define MARK_STACK_INITIAL_LENGTH 1024 +#else +#define MARK_STACK_INITIAL_LENGTH 128 +#endif // HOST_64BIT + +extern uint32_t yp_spin_count_unit; +extern uint32_t original_spin_count_unit; + +#ifdef GC_CONFIG_DRIVEN +extern int compact_ratio; +#define gc_config_log_buffer_size (1*1024) // TEMP +extern FILE* gc_config_log; +extern uint8_t* gc_config_log_buffer; +#endif //GC_CONFIG_DRIVEN + +#ifdef WRITE_WATCH +extern uint8_t* g_addresses [array_size+2]; +#endif //WRITE_WATCH + +#define lowbits(wrd, bits) ((wrd) & ((1 << (bits))-1)) +#define highbits(wrd, bits) ((wrd) & ~((1 << (bits))-1)) + +#if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE) +extern BOOL bgc_heap_walk_for_etw_p; +#endif //BACKGROUND_GC && FEATURE_EVENT_TRACE + +extern const char* const str_root_kinds[]; + +#ifdef MH_SC_MARK +extern const int max_snoop_level; +#endif //MH_SC_MARK + +// This needs to check the range that's covered by bookkeeping because find_object will +// need to look at the brick table. +inline +bool gc_heap::is_in_find_object_range (uint8_t* o) +{ + if (o == nullptr) + { + return false; + } +#if defined(USE_REGIONS) && defined(FEATURE_CONSERVATIVE_GC) + return ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed)); +#else //USE_REGIONS && FEATURE_CONSERVATIVE_GC + if ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)) + { +#ifdef USE_REGIONS + assert ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed)); +#endif //USE_REGIONS + return true; + } + else + { + return false; + } +#endif //USE_REGIONS && FEATURE_CONSERVATIVE_GC +} + +#ifdef USE_REGIONS + +// This assumes o is guaranteed to be in a region. +inline +bool gc_heap::is_in_condemned_gc (uint8_t* o) +{ + assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address)); + + int condemned_gen = settings.condemned_generation; + if (condemned_gen < max_generation) + { + int gen = get_region_gen_num (o); + if (gen > condemned_gen) + { + return false; + } + } + + return true; +} + +#endif //USE_REGIONS + +#ifdef MULTIPLE_HEAPS +extern uint32_t g_num_active_processors; + +// Note that when a join is no longer used we still keep the values here because +// tooling already recognized them as having the meaning they were assigned originally. +// It doesn't break tooling if we stop using them but does if we assign a new meaning +// to them. +enum gc_join_stage +{ + gc_join_init_cpu_mapping = 0, + gc_join_done = 1, + gc_join_generation_determined = 2, + gc_join_begin_mark_phase = 3, + gc_join_scan_dependent_handles = 4, + gc_join_rescan_dependent_handles = 5, + gc_join_scan_sizedref_done = 6, + gc_join_null_dead_short_weak = 7, + gc_join_scan_finalization = 8, + gc_join_null_dead_long_weak = 9, + gc_join_null_dead_syncblk = 10, + gc_join_decide_on_compaction = 11, + gc_join_rearrange_segs_compaction = 12, + gc_join_adjust_handle_age_compact = 13, + gc_join_adjust_handle_age_sweep = 14, + gc_join_begin_relocate_phase = 15, + gc_join_relocate_phase_done = 16, + gc_join_verify_objects_done = 17, + gc_join_start_bgc = 18, + gc_join_restart_ee = 19, + gc_join_concurrent_overflow = 20, + gc_join_suspend_ee = 21, + gc_join_bgc_after_ephemeral = 22, + gc_join_allow_fgc = 23, + gc_join_bgc_sweep = 24, + gc_join_suspend_ee_verify = 25, + gc_join_restart_ee_verify = 26, + gc_join_set_state_free = 27, + gc_r_join_update_card_bundle = 28, + gc_join_after_absorb = 29, + gc_join_verify_copy_table = 30, + gc_join_after_reset = 31, + gc_join_after_ephemeral_sweep = 32, + gc_join_after_profiler_heap_walk = 33, + gc_join_minimal_gc = 34, + gc_join_after_commit_soh_no_gc = 35, + gc_join_expand_loh_no_gc = 36, + gc_join_final_no_gc = 37, + // No longer in use but do not remove, see comments for this enum. + gc_join_disable_software_write_watch = 38, + gc_join_merge_temp_fl = 39, + gc_join_bridge_processing = 40, + gc_join_max = 41 +}; + +enum gc_join_flavor +{ + join_flavor_server_gc = 0, + join_flavor_bgc = 1 +}; + +#define first_thread_arrived 2 +#pragma warning(push) +#pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads +struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure +{ + // Shared non volatile keep on separate line to prevent eviction + int n_threads; + + // Keep polling/wait structures on separate line write once per join + DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) + GCEvent joined_event[3]; // the last event in the array is only used for first_thread_arrived. + Volatile lock_color; + VOLATILE(BOOL) wait_done; + VOLATILE(BOOL) joined_p; + + // Keep volatile counted locks on separate cache line write many per join + DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) + VOLATILE(int) join_lock; + VOLATILE(int) r_join_lock; + +}; +#pragma warning(pop) + +enum join_type +{ + type_last_join = 0, + type_join = 1, + type_restart = 2, + type_first_r_join = 3, + type_r_join = 4 +}; + +enum join_time +{ + time_start = 0, + time_end = 1 +}; + +enum join_heap_index +{ + join_heap_restart = 100, + join_heap_r_restart = 200 +}; + +class t_join +{ + join_structure join_struct; + + int id; + gc_join_flavor flavor; + +#ifdef JOIN_STATS + uint64_t start[MAX_SUPPORTED_CPUS], end[MAX_SUPPORTED_CPUS], start_seq; + // remember join id and last thread to arrive so restart can use these + int thd; + // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval + uint64_t start_tick; + // counters for joins, in 1000's of clock cycles + uint64_t elapsed_total[gc_join_max], wake_total[gc_join_max], seq_loss_total[gc_join_max], par_loss_total[gc_join_max], in_join_total[gc_join_max]; +#endif //JOIN_STATS + +public: + BOOL init (int n_th, gc_join_flavor f) + { + dprintf (JOIN_LOG, ("Initializing join structure")); + join_struct.n_threads = n_th; + join_struct.lock_color = 0; + for (int i = 0; i < 3; i++) + { + if (!join_struct.joined_event[i].IsValid()) + { + join_struct.joined_p = FALSE; + dprintf (JOIN_LOG, ("Creating join event %d", i)); + // TODO - changing this to a non OS event + // because this is also used by BGC threads which are + // managed threads and WaitEx does not allow you to wait + // for an OS event on a managed thread. + // But we are not sure if this plays well in the hosting + // environment. + //join_struct.joined_event[i].CreateOSManualEventNoThrow(FALSE); + if (!join_struct.joined_event[i].CreateManualEventNoThrow(FALSE)) + return FALSE; + } + } + join_struct.join_lock = join_struct.n_threads; + join_struct.r_join_lock = join_struct.n_threads; + join_struct.wait_done = FALSE; + flavor = f; + +#ifdef JOIN_STATS + start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); +#endif //JOIN_STATS + + return TRUE; + } + + void update_n_threads(int n_th) + { + join_struct.n_threads = n_th; + join_struct.join_lock = n_th; + join_struct.r_join_lock = n_th; + } + + int get_num_threads() + { + return join_struct.n_threads; + } + + // This is for instrumentation only. + int get_join_lock() + { + return VolatileLoadWithoutBarrier (&join_struct.join_lock); + } + + void destroy () + { + dprintf (JOIN_LOG, ("Destroying join structure")); + for (int i = 0; i < 3; i++) + { + if (join_struct.joined_event[i].IsValid()) + join_struct.joined_event[i].CloseEvent(); + } + } + + inline void fire_event (int heap, join_time time, join_type type, int join_id) + { + FIRE_EVENT(GCJoin_V2, heap, time, type, join_id); + } + + void join (gc_heap* gch, int join_id) + { +#ifdef JOIN_STATS + // parallel execution ends here + end[gch->heap_number] = get_ts(); +#endif //JOIN_STATS + + assert (!join_struct.joined_p); + int color = join_struct.lock_color.LoadWithoutBarrier(); + + if (Interlocked::Decrement(&join_struct.join_lock) != 0) + { + dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d", + flavor, join_id, (int32_t)(join_struct.join_lock))); + + fire_event (gch->heap_number, time_start, type_join, join_id); + + //busy wait around the color + if (color == join_struct.lock_color.LoadWithoutBarrier()) + { +respin: + int spin_count = 128 * yp_spin_count_unit; + for (int j = 0; j < spin_count; j++) + { + if (color != join_struct.lock_color.LoadWithoutBarrier()) + { + break; + } + YieldProcessor(); // indicate to the processor that we are spinning + } + + // we've spun, and if color still hasn't changed, fall into hard wait + if (color == join_struct.lock_color.LoadWithoutBarrier()) + { + dprintf (JOIN_LOG, ("join%d(%d): Join() hard wait on reset event %d, join_lock is now %d", + flavor, join_id, color, (int32_t)(join_struct.join_lock))); + + uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE); + + if (dwJoinWait != WAIT_OBJECT_0) + { + STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait); + FATAL_GC_ERROR (); + } + } + + // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() + if (color == join_struct.lock_color.LoadWithoutBarrier()) + { + dprintf (9999, ("---h%d %d j%d %d - respin!!! (c:%d-%d)", + gch->heap_number, join_id, join_struct.n_threads, color, join_struct.lock_color.LoadWithoutBarrier())); + goto respin; + } + + dprintf (JOIN_LOG, ("join%d(%d): Join() done, join_lock is %d", + flavor, join_id, (int32_t)(join_struct.join_lock))); + } + + fire_event (gch->heap_number, time_end, type_join, join_id); + +#ifdef JOIN_STATS + // parallel execution starts here + start[gch->heap_number] = get_ts(); + Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number])); +#endif //JOIN_STATS + } + else + { + fire_event (gch->heap_number, time_start, type_last_join, join_id); + + join_struct.joined_p = TRUE; + dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id)); + join_struct.joined_event[!color].Reset(); + id = join_id; +#ifdef JOIN_STATS + // remember the join id, the last thread arriving, the start of the sequential phase, + // and keep track of the cycles spent waiting in the join + thd = gch->heap_number; + start_seq = get_ts(); + Interlocked::ExchangeAdd(&in_join_total[join_id], (start_seq - end[gch->heap_number])); +#endif //JOIN_STATS + } + } + + // Reverse join - first thread gets here does the work; other threads will only proceed + // after the work is done. + // Note that you cannot call this twice in a row on the same thread. Plus there's no + // need to call it twice in row - you should just merge the work. + BOOL r_join (gc_heap* gch, int join_id) + { + + if (join_struct.n_threads == 1) + { + return TRUE; + } + + if (Interlocked::CompareExchange(&join_struct.r_join_lock, 0, join_struct.n_threads) == 0) + { + fire_event (gch->heap_number, time_start, type_join, join_id); + + dprintf (JOIN_LOG, ("r_join() Waiting...")); + + //busy wait around the color +respin: + int spin_count = 256 * yp_spin_count_unit; + for (int j = 0; j < spin_count; j++) + { + if (join_struct.wait_done) + { + break; + } + YieldProcessor(); // indicate to the processor that we are spinning + } + + // we've spun, and if color still hasn't changed, fall into hard wait + if (!join_struct.wait_done) + { + dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived)); + uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE); + if (dwJoinWait != WAIT_OBJECT_0) + { + STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait); + FATAL_GC_ERROR (); + } + } + + // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent() + if (!join_struct.wait_done) + { + goto respin; + } + + dprintf (JOIN_LOG, ("r_join() done")); + + fire_event (gch->heap_number, time_end, type_join, join_id); + + return FALSE; + } + else + { + fire_event (gch->heap_number, time_start, type_first_r_join, join_id); + return TRUE; + } + } + +#ifdef JOIN_STATS + uint64_t get_ts() + { + return GCToOSInterface::QueryPerformanceCounter(); + } + + void start_ts (gc_heap* gch) + { + // parallel execution ends here + start[gch->heap_number] = get_ts(); + } +#endif //JOIN_STATS + + void restart() + { +#ifdef JOIN_STATS + uint64_t elapsed_seq = get_ts() - start_seq; + uint64_t max = 0, sum = 0, wake = 0; + uint64_t min_ts = start[0]; + for (int i = 1; i < join_struct.n_threads; i++) + { + if(min_ts > start[i]) min_ts = start[i]; + } + + for (int i = 0; i < join_struct.n_threads; i++) + { + uint64_t wake_delay = start[i] - min_ts; + uint64_t elapsed = end[i] - start[i]; + if (max < elapsed) + max = elapsed; + sum += elapsed; + wake += wake_delay; + } + uint64_t seq_loss = (join_struct.n_threads - 1)*elapsed_seq; + uint64_t par_loss = join_struct.n_threads*max - sum; + double efficiency = 0.0; + if (max > 0) + efficiency = sum*100.0/(join_struct.n_threads*max); + + const double ts_scale = 1e-6; + + // enable this printf to get statistics on each individual join as it occurs + //printf("join #%3d seq_loss = %5g par_loss = %5g efficiency = %3.0f%%\n", join_id, ts_scale*seq_loss, ts_scale*par_loss, efficiency); + + elapsed_total[id] += sum; + wake_total[id] += wake; + seq_loss_total[id] += seq_loss; + par_loss_total[id] += par_loss; + + // every 10 seconds, print a summary of the time spent in each type of join + if (GCToOSInterface::GetLowPrecisionTimeStamp() - start_tick > 10*1000) + { + printf("**** summary *****\n"); + for (int i = 0; i < 16; i++) + { + printf("join #%3d elapsed_total = %8g wake_loss = %8g seq_loss = %8g par_loss = %8g in_join_total = %8g\n", + i, + ts_scale*elapsed_total[i], + ts_scale*wake_total[i], + ts_scale*seq_loss_total[i], + ts_scale*par_loss_total[i], + ts_scale*in_join_total[i]); + elapsed_total[i] = wake_total[i] = seq_loss_total[i] = par_loss_total[i] = in_join_total[i] = 0; + } + start_tick = GCToOSInterface::GetLowPrecisionTimeStamp(); + } +#endif //JOIN_STATS + + fire_event (join_heap_restart, time_start, type_restart, -1); + assert (join_struct.joined_p); + join_struct.joined_p = FALSE; + join_struct.join_lock = join_struct.n_threads; + dprintf (JOIN_LOG, ("join%d(%d): Restarting from join: join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock))); + int color = join_struct.lock_color.LoadWithoutBarrier(); + join_struct.lock_color = !color; + join_struct.joined_event[color].Set(); + + fire_event (join_heap_restart, time_end, type_restart, -1); + +#ifdef JOIN_STATS + start[thd] = get_ts(); +#endif //JOIN_STATS + } + + BOOL joined() + { + dprintf (JOIN_LOG, ("join%d(%d): joined, join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock))); + return join_struct.joined_p; + } + + void r_restart() + { + if (join_struct.n_threads != 1) + { + fire_event (join_heap_r_restart, time_start, type_restart, -1); + join_struct.wait_done = TRUE; + join_struct.joined_event[first_thread_arrived].Set(); + fire_event (join_heap_r_restart, time_end, type_restart, -1); + } + } + + void r_init() + { + if (join_struct.n_threads != 1) + { + join_struct.r_join_lock = join_struct.n_threads; + join_struct.wait_done = FALSE; + join_struct.joined_event[first_thread_arrived].Reset(); + } + } +}; + +extern t_join gc_t_join; +#ifdef BACKGROUND_GC +extern t_join bgc_t_join; +#endif //BACKGROUND_GC +#endif //MULTIPLE_HEAPS + +inline +void c_write (uint32_t& place, uint32_t value) +{ + Interlocked::Exchange (&place, value); +} + +#define spin_and_switch(count_to_spin, expr) \ +{ \ + for (int j = 0; j < count_to_spin; j++) \ + { \ + if (expr) \ + { \ + break;\ + } \ + YieldProcessor(); \ + } \ + if (!(expr)) \ + { \ + GCToOSInterface::YieldThread(0); \ + } \ +} + +#define spin_and_wait(count_to_spin, expr) \ +{ \ + while (!expr) \ + { \ + for (int j = 0; j < count_to_spin; j++) \ + { \ + if (expr) \ + { \ + break; \ + } \ + YieldProcessor (); \ + } \ + if (!(expr)) \ + { \ + GCToOSInterface::YieldThread (0); \ + } \ + } \ +} + +#ifdef BACKGROUND_GC +#define max_pending_allocs 64 + +extern float bgc_uoh_inc_ratio_alloc_normal; +extern float bgc_uoh_inc_ratio_alloc_wait; + +class exclusive_sync +{ + VOLATILE(uint8_t*) rwp_object; + VOLATILE(int32_t) needs_checking; + + int spin_count; + + uint8_t cache_separator[HS_CACHE_LINE_SIZE - (sizeof (spin_count) + sizeof (needs_checking) + sizeof (rwp_object))]; + + // TODO - perhaps each object should be on its own cache line... + VOLATILE(uint8_t*) alloc_objects[max_pending_allocs]; + + int find_free_index () + { + for (int i = 0; i < max_pending_allocs; i++) + { + if (alloc_objects [i] == (uint8_t*)0) + { + return i; + } + } + + return -1; + } + +public: + void init() + { + spin_count = 32 * (g_num_processors - 1); + rwp_object = 0; + needs_checking = 0; + for (int i = 0; i < max_pending_allocs; i++) + { + alloc_objects [i] = (uint8_t*)0; + } + } + + void check() + { + for (int i = 0; i < max_pending_allocs; i++) + { + if (alloc_objects [i] != (uint8_t*)0) + { + FATAL_GC_ERROR(); + } + } + } + + void bgc_mark_set (uint8_t* obj) + { + dprintf (3, ("cm: probing %p", obj)); +retry: + if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0) + { + // If we spend too much time spending all the allocs, + // consider adding a high water mark and scan up + // to that; we'll need to interlock in done when + // we update the high watermark. + for (int i = 0; i < max_pending_allocs; i++) + { + if (obj == alloc_objects[i]) + { + needs_checking = 0; + dprintf (3, ("cm: will spin")); + spin_and_switch (spin_count, (obj != alloc_objects[i])); + goto retry; + } + } + + rwp_object = obj; + needs_checking = 0; + dprintf (3, ("cm: set %p", obj)); + return; + } + else + { + spin_and_switch (spin_count, (needs_checking == 0)); + goto retry; + } + } + + int uoh_alloc_set (uint8_t* obj) + { + if (!gc_heap::cm_in_progress) + { + return -1; + } + +retry: + dprintf (3, ("uoh alloc: probing %p", obj)); + + if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0) + { + if (obj == rwp_object) + { + needs_checking = 0; + spin_and_switch (spin_count, (obj != rwp_object)); + goto retry; + } + else + { + int cookie = find_free_index(); + + if (cookie != -1) + { + alloc_objects[cookie] = obj; + needs_checking = 0; + + dprintf (3, ("uoh alloc: set %p at %d", obj, cookie)); + return cookie; + } + else + { + needs_checking = 0; + dprintf (3, ("uoh alloc: setting %p will spin to acquire a free index", obj)); + spin_and_switch (spin_count, (find_free_index () != -1)); + goto retry; + } + } + } + else + { + dprintf (3, ("uoh alloc: will spin on checking %p", obj)); + spin_and_switch (spin_count, (needs_checking == 0)); + goto retry; + } + } + + void bgc_mark_done () + { + dprintf (3, ("cm: release lock on %p", (uint8_t *)rwp_object)); + rwp_object = 0; + } + + void uoh_alloc_done_with_index (int index) + { + dprintf (3, ("uoh alloc: release lock on %p based on %d", (uint8_t *)alloc_objects[index], index)); + assert ((index >= 0) && (index < max_pending_allocs)); + alloc_objects[index] = (uint8_t*)0; + } + + void uoh_alloc_done (uint8_t* obj) + { + if (!gc_heap::cm_in_progress) + { + return; + } + + for (int i = 0; i < max_pending_allocs; i++) + { + if (alloc_objects [i] == obj) + { + uoh_alloc_done_with_index(i); + return; + } + } + dprintf (3, ("uoh alloc: could not release lock on %p", obj)); + } +}; +#endif //BACKGROUND_GC + +#ifdef FEATURE_BASICFREEZE +// The array we allocate is organized as follows: +// 0th element is the address of the last array we allocated. +// starting from the 1st element are the segment addresses, that's +// what buckets() returns. +struct bk +{ + uint8_t* add; + size_t val; +}; + +class sorted_table +{ +private: + ptrdiff_t size; + ptrdiff_t count; + bk* slots; + bk* buckets() { return (slots + 1); } + uint8_t*& last_slot (bk* arr) { return arr[0].add; } + bk* old_slots; +public: + static sorted_table* make_sorted_table (); + BOOL insert (uint8_t* add, size_t val);; + size_t lookup (uint8_t*& add); + void remove (uint8_t* add); + void clear (); + void delete_sorted_table(); + void delete_old_slots(); + void enqueue_old_slot(bk* sl); + BOOL ensure_space_for_insert(); +}; +#endif //FEATURE_BASICFREEZE + +#ifdef FEATURE_STRUCTALIGN +BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment); +#endif // FEATURE_STRUCTALIGN + +#define GC_MARKED (size_t)0x1 +#ifdef DOUBLY_LINKED_FL +// This bit indicates that we'll need to set the bgc mark bit for this object during an FGC. +// We only do this when we decide to compact. +#define BGC_MARKED_BY_FGC (size_t)0x2 +#define MAKE_FREE_OBJ_IN_COMPACT (size_t)0x4 +#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED|BGC_MARKED_BY_FGC|MAKE_FREE_OBJ_IN_COMPACT) +#else //DOUBLY_LINKED_FL +#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED) +#endif //!DOUBLY_LINKED_FL + +#ifdef HOST_64BIT +#define SPECIAL_HEADER_BITS (0x7) +#else +#define SPECIAL_HEADER_BITS (0x3) +#endif + +#define free_object_base_size (plug_skew + sizeof(ArrayBase)) + +#define free_list_slot(x) ((uint8_t**)(x))[2] +#define free_list_undo(x) ((uint8_t**)(x))[-1] +#define UNDO_EMPTY ((uint8_t*)1) + +#ifdef DOUBLY_LINKED_FL +#define free_list_prev(x) ((uint8_t**)(x))[3] +#define PREV_EMPTY ((uint8_t*)1) + +inline +void check_and_clear_in_free_list (uint8_t* o, size_t size) +{ + if (size >= min_free_list) + { + free_list_prev (o) = PREV_EMPTY; + } +} + +#endif //DOUBLY_LINKED_FL + +typedef void** PTR_PTR; + +class CObjectHeader : public Object +{ +public: + +#if defined(FEATURE_NATIVEAOT) || defined(BUILD_AS_STANDALONE) + // The GC expects the following methods that are provided by the Object class in the CLR but not provided + // by NativeAOT's version of Object. + uint32_t GetNumComponents() + { + return ((ArrayBase *)this)->GetNumComponents(); + } + + void Validate(BOOL bDeep=TRUE, BOOL bVerifyNextHeader = FALSE, BOOL bVerifySyncBlock = FALSE) + { + // declaration of extra parameters just so the call site would need no #ifdefs + UNREFERENCED_PARAMETER(bVerifyNextHeader); + UNREFERENCED_PARAMETER(bVerifySyncBlock); + + MethodTable * pMT = GetMethodTable(); + + _ASSERTE(pMT->SanityCheck()); + + bool noRangeChecks = + (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_NO_RANGE_CHECKS) == GCConfig::HEAPVERIFY_NO_RANGE_CHECKS; + + BOOL fSmallObjectHeapPtr = FALSE, fLargeObjectHeapPtr = FALSE; + if (!noRangeChecks) + { + fSmallObjectHeapPtr = g_theGCHeap->IsHeapPointer(this, TRUE); + if (!fSmallObjectHeapPtr) + fLargeObjectHeapPtr = g_theGCHeap->IsHeapPointer(this); + + _ASSERTE(fSmallObjectHeapPtr || fLargeObjectHeapPtr); + } + +#ifdef FEATURE_STRUCTALIGN + _ASSERTE(IsStructAligned((uint8_t *)this, GetMethodTable()->GetBaseAlignment())); +#endif // FEATURE_STRUCTALIGN + +#if defined(FEATURE_64BIT_ALIGNMENT) && !defined(FEATURE_NATIVEAOT) + if (pMT->RequiresAlign8()) + { + _ASSERTE((((size_t)this) & 0x7) == (pMT->IsValueType() ? 4U : 0U)); + } +#endif // FEATURE_64BIT_ALIGNMENT + +#ifdef VERIFY_HEAP + if (bDeep && (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)) + g_theGCHeap->ValidateObjectMember(this); +#endif + if (fSmallObjectHeapPtr) + { +#ifdef FEATURE_BASICFREEZE + _ASSERTE(!g_theGCHeap->IsLargeObject(this) || g_theGCHeap->IsInFrozenSegment(this)); +#else + _ASSERTE(!g_theGCHeap->IsLargeObject(this)); +#endif + } + } + + void ValidateHeap(BOOL bDeep) + { + Validate(bDeep); + } + +#endif //FEATURE_NATIVEAOT || BUILD_AS_STANDALONE + + ///// + // + // Header Status Information + // + + MethodTable *GetMethodTable() const + { + return( (MethodTable *) (((size_t) RawGetMethodTable()) & (~SPECIAL_HEADER_BITS))); + } + + void SetMarked() + { + _ASSERTE(RawGetMethodTable()); + RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | GC_MARKED)); + } + + BOOL IsMarked() const + { + return !!(((size_t)RawGetMethodTable()) & GC_MARKED); + } + + void SetPinned() + { + assert (!(gc_heap::settings.concurrent)); + GetHeader()->SetGCBit(); + } + + BOOL IsPinned() const + { + return !!((((CObjectHeader*)this)->GetHeader()->GetBits()) & BIT_SBLK_GC_RESERVE); + } + + // Now we set more bits should actually only clear the mark bit + void ClearMarked() + { +#ifdef DOUBLY_LINKED_FL + RawSetMethodTable ((MethodTable *)(((size_t) RawGetMethodTable()) & (~GC_MARKED))); +#else + RawSetMethodTable (GetMethodTable()); +#endif //DOUBLY_LINKED_FL + } + +#ifdef DOUBLY_LINKED_FL + void SetBGCMarkBit() + { + RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | BGC_MARKED_BY_FGC)); + } + BOOL IsBGCMarkBitSet() const + { + return !!(((size_t)RawGetMethodTable()) & BGC_MARKED_BY_FGC); + } + void ClearBGCMarkBit() + { + RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~BGC_MARKED_BY_FGC))); + } + + void SetFreeObjInCompactBit() + { + RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | MAKE_FREE_OBJ_IN_COMPACT)); + } + BOOL IsFreeObjInCompactBitSet() const + { + return !!(((size_t)RawGetMethodTable()) & MAKE_FREE_OBJ_IN_COMPACT); + } + void ClearFreeObjInCompactBit() + { +#ifdef _DEBUG + // check this looks like an object, but do NOT validate pointers to other objects + // as these may not be valid yet - we are calling this during compact_phase + Validate(FALSE); +#endif //_DEBUG + RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~MAKE_FREE_OBJ_IN_COMPACT))); + } +#endif //DOUBLY_LINKED_FL + + size_t ClearSpecialBits() + { + size_t special_bits = ((size_t)RawGetMethodTable()) & SPECIAL_HEADER_BITS; + if (special_bits != 0) + { + assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0); + RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) & ~(SPECIAL_HEADER_BITS))); + } + return special_bits; + } + + void SetSpecialBits (size_t special_bits) + { + assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0); + if (special_bits != 0) + { + RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) | special_bits)); + } + } + + CGCDesc *GetSlotMap () + { + assert (GetMethodTable()->ContainsGCPointers()); + return CGCDesc::GetCGCDescFromMT(GetMethodTable()); + } + + void SetFree(size_t size) + { + assert (size >= free_object_base_size); + + assert (g_gc_pFreeObjectMethodTable->GetBaseSize() == free_object_base_size); + assert (g_gc_pFreeObjectMethodTable->RawGetComponentSize() == 1); + + RawSetMethodTable( g_gc_pFreeObjectMethodTable ); + + size_t* numComponentsPtr = (size_t*) &((uint8_t*) this)[ArrayBase::GetOffsetOfNumComponents()]; + *numComponentsPtr = size - free_object_base_size; +#ifdef VERIFY_HEAP + //This introduces a bug in the free list management. + //((void**) this)[-1] = 0; // clear the sync block, + assert (*numComponentsPtr >= 0); + if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) + { + memset (((uint8_t*)this)+sizeof(ArrayBase), 0xcc, *numComponentsPtr); +#ifdef DOUBLY_LINKED_FL + // However, in this case we can't leave the Next field uncleared because no one will clear it + // so it remains 0xcc and that's not good for verification + if (*numComponentsPtr > 0) + { + free_list_slot (this) = 0; + } +#endif //DOUBLY_LINKED_FL + } +#endif //VERIFY_HEAP + +#ifdef DOUBLY_LINKED_FL + // For background GC, we need to distinguish between a free object that's not on the free list + // and one that is. So we always set its prev to PREV_EMPTY to indicate that it's a free + // object that's not on the free list. If it should be on the free list, it will be set to the + // appropriate non zero value. + check_and_clear_in_free_list ((uint8_t*)this, size); +#endif //DOUBLY_LINKED_FL + } + + void UnsetFree() + { + size_t size = free_object_base_size - plug_skew; + + // since we only need to clear 2 ptr size, we do it manually + PTR_PTR m = (PTR_PTR) this; + for (size_t i = 0; i < size / sizeof(PTR_PTR); i++) + *(m++) = 0; + } + + BOOL IsFree () const + { + return (GetMethodTable() == g_gc_pFreeObjectMethodTable); + } + +#ifdef FEATURE_STRUCTALIGN + int GetRequiredAlignment () const + { + return GetMethodTable()->GetRequiredAlignment(); + } +#endif // FEATURE_STRUCTALIGN + + BOOL ContainsGCPointers() const + { + return GetMethodTable()->ContainsGCPointers(); + } + +#ifdef COLLECTIBLE_CLASS + BOOL Collectible() const + { + return GetMethodTable()->Collectible(); + } + + FORCEINLINE BOOL ContainsGCPointersOrCollectible() const + { + MethodTable *pMethodTable = GetMethodTable(); + return (pMethodTable->ContainsGCPointers() || pMethodTable->Collectible()); + } +#endif //COLLECTIBLE_CLASS + + Object* GetObjectBase() const + { + return (Object*) this; + } +}; + +#define header(i) ((CObjectHeader*)(i)) +#define method_table(o) ((CObjectHeader*)(o))->GetMethodTable() + +inline +BOOL is_induced (gc_reason reason) +{ + return ((reason == reason_induced) || + (reason == reason_induced_noforce) || + (reason == reason_lowmemory) || + (reason == reason_lowmemory_blocking) || + (reason == reason_induced_compacting) || + (reason == reason_induced_aggressive) || + (reason == reason_lowmemory_host) || + (reason == reason_lowmemory_host_blocking)); +} + +inline size_t my_get_size (Object* ob) +{ + MethodTable* mT = header(ob)->GetMethodTable(); + + return (mT->GetBaseSize() + + (mT->HasComponentSize() ? + ((size_t)((CObjectHeader*)ob)->GetNumComponents() * mT->RawGetComponentSize()) : 0)); +} + +#define size(i) my_get_size (header(i)) +#define marked(i) header(i)->IsMarked() +#define set_marked(i) header(i)->SetMarked() +#define clear_marked(i) header(i)->ClearMarked() +#define pinned(i) header(i)->IsPinned() +#define set_pinned(i) header(i)->SetPinned() +#define clear_pinned(i) header(i)->GetHeader()->ClrGCBit(); + +inline size_t unused_array_size(uint8_t * p) +{ + assert(((CObjectHeader*)p)->IsFree()); + + size_t* numComponentsPtr = (size_t*)(p + ArrayBase::GetOffsetOfNumComponents()); + return free_object_base_size + *numComponentsPtr; +} + +inline +size_t AlignQword (size_t nbytes) +{ +#ifdef FEATURE_STRUCTALIGN + return Align (nbytes); +#else // FEATURE_STRUCTALIGN + return (nbytes + 7) & ~7; +#endif // FEATURE_STRUCTALIGN +} + +inline +BOOL Aligned (size_t n) +{ + return (n & ALIGNCONST) == 0; +} + +//CLR_SIZE is the max amount of bytes from gen0 that is set to 0 in one chunk +#ifdef SERVER_GC +#define CLR_SIZE ((size_t)(8*1024+32)) +#else //SERVER_GC +#define CLR_SIZE ((size_t)(8*1024+32)) +#endif //SERVER_GC + +#define DECOMMIT_SIZE_PER_MILLISECOND (160*1024) + +#ifndef MULTIPLE_HEAPS +extern const int n_heaps; +#endif //MULTIPLE_HEAPS + +#ifdef MULTIPLE_HEAPS +extern bool affinity_config_specified_p; +#if defined(TARGET_AMD64) && !(defined(_MSC_VER) || defined(__GNUC__)) +extern "C" ptrdiff_t get_cycle_count(void); +#else +ptrdiff_t get_cycle_count(); +#endif + +struct node_heap_count +{ + int node_no; + int heap_count; +}; + +class heap_select +{ + heap_select() {} +public: + static uint8_t* sniff_buffer; + static unsigned n_sniff_buffers; + static unsigned cur_sniff_index; + + static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS]; + static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS]; + static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS]; + static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4]; + +#ifdef HEAP_BALANCE_INSTRUMENTATION + // Note this is the total numa nodes GC heaps are on. There might be + // more on the machine if GC threads aren't using all of them. + static uint16_t total_numa_nodes; + static node_heap_count heaps_on_node[MAX_SUPPORTED_NODES]; +#endif + + static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers) + { + ptrdiff_t start_cycles = get_cycle_count(); + uint8_t sniff = sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE]; + assert (sniff == 0); + ptrdiff_t elapsed_cycles = get_cycle_count() - start_cycles; + // add sniff here just to defeat the optimizer + elapsed_cycles += sniff; + return (int) elapsed_cycles; + } + +public: + static BOOL init(int n_heaps) + { + assert (sniff_buffer == NULL && n_sniff_buffers == 0); + if (!GCToOSInterface::CanGetCurrentProcessorNumber()) + { + n_sniff_buffers = n_heaps*2+1; + size_t n_cache_lines = 1 + n_heaps * n_sniff_buffers + 1; + size_t sniff_buf_size = n_cache_lines * HS_CACHE_LINE_SIZE; + if (sniff_buf_size / HS_CACHE_LINE_SIZE != n_cache_lines) // check for overlow + { + return FALSE; + } + + sniff_buffer = new (nothrow) uint8_t[sniff_buf_size]; + if (sniff_buffer == 0) + return FALSE; + memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t)); + } + + bool do_numa = GCToOSInterface::CanEnableGCNumaAware(); + + // we want to assign heap indices such that there is a contiguous + // range of heap numbers for each numa node + + // we do this in two passes: + // 1. gather processor numbers and numa node numbers for all heaps + // 2. assign heap numbers for each numa node + + // Pass 1: gather processor numbers and numa node numbers + uint16_t proc_no[MAX_SUPPORTED_CPUS]; + uint16_t node_no[MAX_SUPPORTED_CPUS]; + uint16_t max_node_no = 0; + uint16_t heap_num; + for (heap_num = 0; heap_num < n_heaps; heap_num++) + { + if (!GCToOSInterface::GetProcessorForHeap (heap_num, &proc_no[heap_num], &node_no[heap_num])) + break; + assert(proc_no[heap_num] < MAX_SUPPORTED_CPUS); + if (!do_numa || node_no[heap_num] == NUMA_NODE_UNDEFINED) + node_no[heap_num] = 0; + max_node_no = max(max_node_no, node_no[heap_num]); + } + + // Pass 2: assign heap numbers by numa node + int cur_heap_no = 0; + for (uint16_t cur_node_no = 0; cur_node_no <= max_node_no; cur_node_no++) + { + for (int i = 0; i < heap_num; i++) + { + if (node_no[i] != cur_node_no) + continue; + + // we found a heap on cur_node_no + heap_no_to_proc_no[cur_heap_no] = proc_no[i]; + heap_no_to_numa_node[cur_heap_no] = cur_node_no; + + cur_heap_no++; + } + } + + return TRUE; + } + + static void init_cpu_mapping(int heap_number) + { + if (GCToOSInterface::CanGetCurrentProcessorNumber()) + { + uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); + // For a 32-bit process running on a machine with > 64 procs, + // even though the process can only use up to 32 procs, the processor + // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0, + // the GetCurrentProcessorNumber will return a number that's >= 64. + proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS] = (uint16_t)heap_number; + } + } + + static void mark_heap(int heap_number) + { + if (GCToOSInterface::CanGetCurrentProcessorNumber()) + return; + + for (unsigned sniff_index = 0; sniff_index < n_sniff_buffers; sniff_index++) + sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1; + } + + static int select_heap(alloc_context* acontext) + { +#ifndef TRACE_GC + UNREFERENCED_PARAMETER(acontext); // only referenced by dprintf +#endif //TRACE_GC + + if (GCToOSInterface::CanGetCurrentProcessorNumber()) + { + uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber(); + // For a 32-bit process running on a machine with > 64 procs, + // even though the process can only use up to 32 procs, the processor + // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0, + // the GetCurrentProcessorNumber will return a number that's >= 64. + int adjusted_heap = proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS]; + // with dynamic heap count, need to make sure the value is in range. + if (adjusted_heap >= gc_heap::n_heaps) + { + adjusted_heap %= gc_heap::n_heaps; + } + return adjusted_heap; + } + + unsigned sniff_index = Interlocked::Increment(&cur_sniff_index); + sniff_index %= n_sniff_buffers; + + int best_heap = 0; + int best_access_time = 1000*1000*1000; + int second_best_access_time = best_access_time; + + uint8_t *l_sniff_buffer = sniff_buffer; + unsigned l_n_sniff_buffers = n_sniff_buffers; + for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++) + { + int this_access_time = access_time(l_sniff_buffer, heap_number, sniff_index, l_n_sniff_buffers); + if (this_access_time < best_access_time) + { + second_best_access_time = best_access_time; + best_access_time = this_access_time; + best_heap = heap_number; + } + else if (this_access_time < second_best_access_time) + { + second_best_access_time = this_access_time; + } + } + + if (best_access_time*2 < second_best_access_time) + { + sniff_buffer[(1 + best_heap*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1; + + dprintf (3, ("select_heap yields crisp %d for context %p\n", best_heap, (void *)acontext)); + } + else + { + dprintf (3, ("select_heap yields vague %d for context %p\n", best_heap, (void *)acontext )); + } + + return best_heap; + } + + static bool can_find_heap_fast() + { + return GCToOSInterface::CanGetCurrentProcessorNumber(); + } + + static uint16_t find_proc_no_from_heap_no(int heap_number) + { + return heap_no_to_proc_no[heap_number]; + } + + static uint16_t find_numa_node_from_heap_no(int heap_number) + { + return heap_no_to_numa_node[heap_number]; + } + + static void init_numa_node_to_heap_map(int nheaps) + { + // Called right after GCHeap::Init() for each heap + // For each NUMA node used by the heaps, the + // numa_node_to_heap_map[numa_node] is set to the first heap number on that node and + // numa_node_to_heap_map[numa_node + 1] is set to the first heap number not on that node + // Set the start of the heap number range for the first NUMA node + numa_node_to_heap_map[heap_no_to_numa_node[0]] = 0; +#ifdef HEAP_BALANCE_INSTRUMENTATION + total_numa_nodes = 0; + memset (heaps_on_node, 0, sizeof (heaps_on_node)); + heaps_on_node[0].node_no = heap_no_to_numa_node[0]; + heaps_on_node[0].heap_count = 1; +#endif //HEAP_BALANCE_INSTRUMENTATION + + for (int i=1; i < nheaps; i++) + { + if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1]) + { +#ifdef HEAP_BALANCE_INSTRUMENTATION + total_numa_nodes++; + heaps_on_node[total_numa_nodes].node_no = heap_no_to_numa_node[i]; +#endif + + // Set the end of the heap number range for the previous NUMA node + numa_node_to_heap_map[heap_no_to_numa_node[i-1] + 1] = + // Set the start of the heap number range for the current NUMA node + numa_node_to_heap_map[heap_no_to_numa_node[i]] = (uint16_t)i; + } +#ifdef HEAP_BALANCE_INSTRUMENTATION + (heaps_on_node[total_numa_nodes].heap_count)++; +#endif + } + + // Set the end of the heap range for the last NUMA node + numa_node_to_heap_map[heap_no_to_numa_node[nheaps-1] + 1] = (uint16_t)nheaps; //mark the end with nheaps + +#ifdef HEAP_BALANCE_INSTRUMENTATION + total_numa_nodes++; +#endif + } + + static bool get_info_proc (int index, uint16_t* proc_no, uint16_t* node_no, int* start_heap, int* end_heap) + { + if (!GCToOSInterface::GetProcessorForHeap ((uint16_t)index, proc_no, node_no)) + return false; + + if (*node_no == NUMA_NODE_UNDEFINED) + *node_no = 0; + + *start_heap = (int)numa_node_to_heap_map[*node_no]; + *end_heap = (int)(numa_node_to_heap_map[*node_no + 1]); + + return true; + } + + static void distribute_other_procs (bool distribute_all_p) + { + if (affinity_config_specified_p) + return; + + if (distribute_all_p) + { + uint16_t current_heap_no_on_node[MAX_SUPPORTED_CPUS]; + memset (current_heap_no_on_node, 0, sizeof (current_heap_no_on_node)); + uint16_t current_heap_no = 0; + + uint16_t proc_no = 0; + uint16_t node_no = 0; + + for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++) + { + int start_heap, end_heap; + if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap)) + break; + + // This indicates there are heaps on this node + if ((end_heap - start_heap) > 0) + { + proc_no_to_heap_no[proc_no] = (current_heap_no_on_node[node_no] % (uint16_t)(end_heap - start_heap)) + (uint16_t)start_heap; + (current_heap_no_on_node[node_no])++; + } + else + { + proc_no_to_heap_no[proc_no] = current_heap_no % gc_heap::n_heaps; + (current_heap_no)++; + } + } + } + else + { + // This is for scenarios where GCHeapCount is specified as something like + // (g_num_active_processors - 2) to allow less randomization to the Server GC threads. + // In this case we want to assign the right heaps to those procs, ie if they share + // the same numa node we want to assign local heaps to those procs. Otherwise we + // let the heap balancing mechanism take over for now. + uint16_t proc_no = 0; + uint16_t node_no = 0; + int current_node_no = -1; + int current_heap_on_node = -1; + + for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++) + { + int start_heap, end_heap; + if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap)) + break; + + if ((end_heap - start_heap) > 0) + { + if (node_no == current_node_no) + { + // We already iterated through all heaps on this node, don't add more procs to these + // heaps. + if (current_heap_on_node >= end_heap) + { + continue; + } + } + else + { + current_node_no = node_no; + current_heap_on_node = start_heap; + } + + proc_no_to_heap_no[proc_no] = (uint16_t)current_heap_on_node; + + current_heap_on_node++; + } + } + } + } + + static void get_heap_range_for_heap(int hn, int* start, int* end) + { + uint16_t numa_node = heap_no_to_numa_node[hn]; + *start = (int)numa_node_to_heap_map[numa_node]; + *end = (int)(numa_node_to_heap_map[numa_node+1]); +#ifdef HEAP_BALANCE_INSTRUMENTATION + dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end)); +#endif //HEAP_BALANCE_INSTRUMENTATION + } +}; +#endif //MULTIPLE_HEAPS + +class mark +{ +public: + uint8_t* first; + size_t len; + + // If we want to save space we can have a pool of plug_and_gap's instead of + // always having 2 allocated for each pinned plug. + gap_reloc_pair saved_pre_plug; + // If we decide to not compact, we need to restore the original values. + gap_reloc_pair saved_pre_plug_reloc; + + gap_reloc_pair saved_post_plug; + + // Supposedly Pinned objects cannot have references but we are seeing some from pinvoke + // frames. Also if it's an artificially pinned plug created by us, it can certainly + // have references. + // We know these cases will be rare so we can optimize this to be only allocated on demand. + gap_reloc_pair saved_post_plug_reloc; + + // We need to calculate this after we are done with plan phase and before compact + // phase because compact phase will change the bricks so relocate_address will no + // longer work. + uint8_t* saved_pre_plug_info_reloc_start; + + // We need to save this because we will have no way to calculate it, unlike the + // pre plug info start which is right before this plug. + uint8_t* saved_post_plug_info_start; + +#ifdef SHORT_PLUGS + uint8_t* allocation_context_start_region; +#endif //SHORT_PLUGS + + // How the bits in these bytes are organized: + // MSB --> LSB + // bit to indicate whether it's a short obj | 3 bits for refs in this short obj | 2 unused bits | bit to indicate if it's collectible | last bit + // last bit indicates if there's pre or post info associated with this plug. If it's not set all other bits will be 0. + BOOL saved_pre_p; + BOOL saved_post_p; + +#ifdef _DEBUG + // We are seeing this is getting corrupted for a PP with a NP after. + // Save it when we first set it and make sure it doesn't change. + gap_reloc_pair saved_post_plug_debug; +#endif //_DEBUG + + size_t get_max_short_bits() + { + return (sizeof (gap_reloc_pair) / sizeof (uint8_t*)); + } + + // pre bits + size_t get_pre_short_start_bit () + { + return (sizeof (saved_pre_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*))); + } + + BOOL pre_short_p() + { + return (saved_pre_p & (1 << (sizeof (saved_pre_p) * 8 - 1))); + } + + void set_pre_short() + { + saved_pre_p |= (1 << (sizeof (saved_pre_p) * 8 - 1)); + } + + void set_pre_short_bit (size_t bit) + { + saved_pre_p |= 1 << (get_pre_short_start_bit() + bit); + } + + BOOL pre_short_bit_p (size_t bit) + { + return (saved_pre_p & (1 << (get_pre_short_start_bit() + bit))); + } + +#ifdef COLLECTIBLE_CLASS + void set_pre_short_collectible() + { + saved_pre_p |= 2; + } + + BOOL pre_short_collectible_p() + { + return (saved_pre_p & 2); + } +#endif //COLLECTIBLE_CLASS + + // post bits + size_t get_post_short_start_bit () + { + return (sizeof (saved_post_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*))); + } + + BOOL post_short_p() + { + return (saved_post_p & (1 << (sizeof (saved_post_p) * 8 - 1))); + } + + void set_post_short() + { + saved_post_p |= (1 << (sizeof (saved_post_p) * 8 - 1)); + } + + void set_post_short_bit (size_t bit) + { + saved_post_p |= 1 << (get_post_short_start_bit() + bit); + } + + BOOL post_short_bit_p (size_t bit) + { + return (saved_post_p & (1 << (get_post_short_start_bit() + bit))); + } + +#ifdef COLLECTIBLE_CLASS + void set_post_short_collectible() + { + saved_post_p |= 2; + } + + BOOL post_short_collectible_p() + { + return (saved_post_p & 2); + } +#endif //COLLECTIBLE_CLASS + + uint8_t* get_plug_address() { return first; } + + BOOL has_pre_plug_info() { return saved_pre_p; } + BOOL has_post_plug_info() { return saved_post_p; } + + gap_reloc_pair* get_pre_plug_reloc_info() { return &saved_pre_plug_reloc; } + gap_reloc_pair* get_post_plug_reloc_info() { return &saved_post_plug_reloc; } + void set_pre_plug_info_reloc_start (uint8_t* reloc) { saved_pre_plug_info_reloc_start = reloc; } + uint8_t* get_post_plug_info_start() { return saved_post_plug_info_start; } + + // We need to temporarily recover the shortened plugs for compact phase so we can + // copy over the whole plug and their related info (mark bits/cards). But we will + // need to set the artificial gap back so compact phase can keep reading the plug info. + // We also need to recover the saved info because we'll need to recover it later. + // + // So we would call swap_p*_plug_and_saved once to recover the object info; then call + // it again to recover the artificial gap. + void swap_pre_plug_and_saved() + { + gap_reloc_pair temp; + memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp)); + memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc)); + saved_pre_plug_reloc = temp; + } + + void swap_post_plug_and_saved() + { + gap_reloc_pair temp; + memcpy (&temp, saved_post_plug_info_start, sizeof (temp)); + memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc)); + saved_post_plug_reloc = temp; + } + + void swap_pre_plug_and_saved_for_profiler() + { + gap_reloc_pair temp; + memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp)); + memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug)); + saved_pre_plug = temp; + } + + void swap_post_plug_and_saved_for_profiler() + { + gap_reloc_pair temp; + memcpy (&temp, saved_post_plug_info_start, sizeof (temp)); + memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug)); + saved_post_plug = temp; + } + + // We should think about whether it's really necessary to have to copy back the pre plug + // info since it was already copied during compacting plugs. But if a plug doesn't move + // by >= 3 ptr size (the size of gap_reloc_pair), it means we'd have to recover pre plug info. + size_t recover_plug_info() + { + // We need to calculate the size for sweep case in order to correctly record the + // free_obj_space - sweep would've made these artificial gaps into free objects and + // we would need to deduct the size because now we are writing into those free objects. + size_t recovered_sweep_size = 0; + + if (saved_pre_p) + { + if (gc_heap::settings.compaction) + { + dprintf (3, ("%p: REC Pre: %p-%p", + first, + &saved_pre_plug_reloc, + saved_pre_plug_info_reloc_start)); + memcpy (saved_pre_plug_info_reloc_start, &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc)); + } + else + { + dprintf (3, ("%p: REC Pre: %p-%p", + first, + &saved_pre_plug, + (first - sizeof (plug_and_gap)))); + memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug)); + recovered_sweep_size += sizeof (saved_pre_plug); + } + } + + if (saved_post_p) + { + if (gc_heap::settings.compaction) + { + dprintf (3, ("%p: REC Post: %p-%p", + first, + &saved_post_plug_reloc, + saved_post_plug_info_start)); + memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc)); + } + else + { + dprintf (3, ("%p: REC Post: %p-%p", + first, + &saved_post_plug, + saved_post_plug_info_start)); + memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug)); + recovered_sweep_size += sizeof (saved_post_plug); + } + } + + return recovered_sweep_size; + } +}; + +// We don't store seg_mapping_table in card_table_info because there's only always one view. +extern seg_mapping* seg_mapping_table; + +class card_table_info +{ +public: + unsigned recount; + size_t size; + uint32_t* next_card_table; + + uint8_t* lowest_address; + uint8_t* highest_address; + short* brick_table; + +#ifdef CARD_BUNDLE + uint32_t* card_bundle_table; +#endif //CARD_BUNDLE + + // mark_array is always at the end of the data structure because we + // want to be able to make one commit call for everything before it. +#ifdef BACKGROUND_GC + uint32_t* mark_array; +#endif //BACKGROUND_GC +}; + +static_assert(offsetof(dac_card_table_info, size) == offsetof(card_table_info, size), "DAC card_table_info layout mismatch"); +static_assert(offsetof(dac_card_table_info, next_card_table) == offsetof(card_table_info, next_card_table), "DAC card_table_info layout mismatch"); + +#ifdef WRITE_WATCH +#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +extern bool virtual_alloc_hardware_write_watch; +#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +extern bool hardware_write_watch_capability; + +inline bool can_use_hardware_write_watch() +{ + return hardware_write_watch_capability; +} + +inline bool can_use_write_watch_for_card_table() +{ +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + return true; +#else + return can_use_hardware_write_watch(); +#endif +} +#endif //WRITE_WATCH + +inline +size_t gib (size_t num) +{ + return (num / 1024 / 1024 / 1024); +} + +#ifdef HOST_64BIT +#define brick_size ((size_t)4096) +#else +#define brick_size ((size_t)2048) +#endif //HOST_64BIT + +inline +uint8_t* align_on_brick (uint8_t* add) +{ + return (uint8_t*)((size_t)(add + brick_size - 1) & ~(brick_size - 1)); +} + +#ifdef CARD_BUNDLE +//threshold of heap size to turn on card bundles. +#define SH_TH_CARD_BUNDLE (40*1024*1024) +#define MH_TH_CARD_BUNDLE (180*1024*1024) +#endif //CARD_BUNDLE + +// time in milliseconds between decommit steps +#define DECOMMIT_TIME_STEP_MILLISECONDS (100) + +#if defined(HOST_64BIT) +#define MAX_ALLOWED_MEM_LOAD 85 +#define MIN_YOUNGEST_GEN_DESIRED (16*1024*1024) +#endif //HOST_64BIT + +extern const size_t min_segment_size_hard_limit; +extern const size_t low_latency_alloc; +extern gc_reason gc_trigger_reason; +extern double qpf_us; + +uint64_t RawGetHighPrecisionTimeStamp(); + +#ifdef WRITE_WATCH +#ifdef BACKGROUND_GC +extern const size_t ww_reset_quantum; +#endif //BACKGROUND_GC + +inline +size_t align_write_watch_lower_page (size_t add) +{ + return (add & ~(WRITE_WATCH_UNIT_SIZE - 1)); +} + +inline +uint8_t* align_write_watch_lower_page (uint8_t* add) +{ + return (uint8_t*)((size_t)add & ~((size_t)OS_PAGE_SIZE - 1)); +} +#endif //WRITE_WATCH + +void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high +#ifdef USE_REGIONS + , gc_heap::region_info* map_region_to_generation_skewed + , uint8_t region_shr +#endif //USE_REGIONS + ); + +void process_sync_log_stats(); +void* virtual_alloc (size_t size); +void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node = NUMA_NODE_UNDEFINED); +size_t get_valid_segment_size (BOOL large_seg = FALSE); +heap_segment* ro_segment_lookup (uint8_t* o); +heap_segment* heap_segment_rw (heap_segment* ns); +heap_segment* heap_segment_next_rw (heap_segment* seg); +heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg); +void enter_spin_lock (GCSpinLock* spin_lock); + +inline +heap_segment* heap_segment_in_range (heap_segment* ns) +{ + if ((ns == 0) || heap_segment_in_range_p (ns)) + { + return ns; + } + else + { + do + { + ns = heap_segment_next (ns); + } while ((ns != 0) && !heap_segment_in_range_p (ns)); + return ns; + } +} + +inline +heap_segment* heap_segment_next_in_range (heap_segment* seg) +{ + heap_segment* ns = heap_segment_next (seg); + return heap_segment_in_range (ns); +} + +inline +BOOL in_range_for_segment (uint8_t* add, heap_segment* seg) +{ + return ((add >= heap_segment_mem (seg)) && (add < heap_segment_reserved (seg))); +} + +// This is for methods that need to iterate through all SOH heap segments/regions. +inline +int get_start_generation_index() +{ +#ifdef USE_REGIONS + return 0; +#else + return max_generation; +#endif //USE_REGIONS +} + +inline +int get_stop_generation_index (int condemned_gen_number) +{ +#ifdef USE_REGIONS + return 0; +#else + return condemned_gen_number; +#endif //USE_REGIONS +} + +inline +uint8_t* align_lower_segment (uint8_t* add) +{ + return (uint8_t*)((size_t)(add) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1)); +} + +#ifdef CARD_BUNDLE +// The card bundle keeps track of groups of card words. +static const size_t card_bundle_word_width = 32; + +static const size_t card_bundle_size = (size_t)(GC_PAGE_SIZE / (sizeof(uint32_t)*card_bundle_word_width)); + +inline +size_t card_bundle_word (size_t cardb) +{ + return cardb / card_bundle_word_width; +} + +inline +uint32_t card_bundle_bit (size_t cardb) +{ + return (uint32_t)(cardb % card_bundle_word_width); +} + +size_t align_cardw_on_bundle (size_t cardw); +size_t cardw_card_bundle (size_t cardw); +size_t card_bundle_cardw (size_t cardb); +uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address); +#endif //CARD_BUNDLE + +inline +uint8_t* align_lower_brick (uint8_t* add) +{ + return (uint8_t*)(((size_t)add) & ~(brick_size - 1)); +} + +size_t size_brick_of (uint8_t* from, uint8_t* end); + +inline +uint8_t* align_on_card (uint8_t* add) +{ + return (uint8_t*)((size_t)(add + card_size - 1) & ~(card_size - 1 )); +} + +inline +uint8_t* align_on_card_word (uint8_t* add) +{ + return (uint8_t*) ((size_t)(add + (card_size*card_word_width)-1) & ~(card_size*card_word_width - 1)); +} + +inline +uint8_t* align_lower_card (uint8_t* add) +{ + return (uint8_t*)((size_t)add & ~(card_size-1)); +} + +size_t gcard_of (uint8_t*); +size_t count_card_of (uint8_t* from, uint8_t* end); +size_t size_card_of (uint8_t* from, uint8_t* end); +size_t size_seg_mapping_table_of (uint8_t* from, uint8_t* end); +heap_segment* seg_mapping_table_segment_of (uint8_t* o); +#ifdef MULTIPLE_HEAPS +gc_heap* seg_mapping_table_heap_of (uint8_t* o); +#endif //MULTIPLE_HEAPS + +inline +gc_heap* gc_heap::heap_of (uint8_t* o) +{ +#ifdef MULTIPLE_HEAPS + if (o == 0) + return g_heaps [0]; + + gc_heap* hp = seg_mapping_table_heap_of (o); + return (hp ? hp : g_heaps[0]); +#else //MULTIPLE_HEAPS + UNREFERENCED_PARAMETER(o); + return __this; +#endif //MULTIPLE_HEAPS +} + +inline +size_t seg_mapping_word_of (uint8_t* add) +{ + return (size_t)add >> gc_heap::min_segment_size_shr; +} + +inline +unsigned& card_table_refcount (uint32_t* c_table) +{ + return *(unsigned*)((char*)c_table - sizeof (card_table_info)); +} + +uint32_t* translate_card_table (uint32_t* ct); +void own_card_table (uint32_t* c_table); +void release_card_table (uint32_t* c_table); +void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_bounds_check); + +inline +short*& card_table_brick_table (uint32_t* c_table) +{ + return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->brick_table; +} + +#ifdef CARD_BUNDLE +inline +uint32_t*& card_table_card_bundle_table (uint32_t* c_table) +{ + return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->card_bundle_table; +} +#endif //CARD_BUNDLE + +#ifdef BACKGROUND_GC +inline +uint32_t*& card_table_mark_array (uint32_t* c_table) +{ + return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->mark_array; +} + +size_t size_mark_array_of (uint8_t* from, uint8_t* end); +uint32_t* translate_mark_array (uint32_t* ma); +#endif //BACKGROUND_GC + +inline +BOOL grow_mark_stack (mark*& m, size_t& len, size_t init_len) +{ + size_t new_size = max (init_len, 2 * len); + mark* tmp = new (nothrow) mark [new_size]; + if (tmp) + { + memcpy (tmp, m, len * sizeof (mark)); + delete[] m; + m = tmp; + len = new_size; + return TRUE; + } + else + { + dprintf (1, ("Failed to allocate %zd bytes for mark stack", (len * sizeof (mark)))); + return FALSE; + } +} + +enum +{ + CORINFO_EXCEPTION_GC = 0xE0004743 // 'GC' +}; + +#define mark_stack_empty_p() (mark_stack_base == mark_stack_tos) + +#ifdef MULTIPLE_HEAPS + +#ifdef GC_CONFIG_DRIVEN +#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}} +#else //GC_CONFIG_DRIVEN +#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}} +#endif //GC_CONFIG_DRIVEN + +#define m_boundary_fullgc(o) {} + +#else //MULTIPLE_HEAPS + +#ifdef GC_CONFIG_DRIVEN +#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;} +#else +#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}if (slow > o) slow = o; if (shigh < o) shigh = o;} +#endif //GC_CONFIG_DRIVEN + +#define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;} + +#endif //MULTIPLE_HEAPS + +#define stolen 2 +#define partial 1 +#define partial_object 3 + +inline +BOOL stolen_p (uint8_t* r) +{ + return (((size_t)r & 2) && !((size_t)r & 1)); +} + +inline +BOOL partial_p (uint8_t* r) +{ + return (((size_t)r & 1) && !((size_t)r & 2)); +} + +inline +BOOL straight_ref_p (uint8_t* r) +{ + return (!stolen_p (r) && !partial_p (r)); +} + +inline +BOOL partial_object_p (uint8_t* r) +{ + return (((size_t)r & partial_object) == partial_object); +} + +#ifdef MULTIPLE_HEAPS +extern VOLATILE(BOOL) s_fUnpromotedHandles; +extern VOLATILE(BOOL) s_fUnscannedPromotions; +extern VOLATILE(BOOL) s_fScanRequired; +#endif //MULTIPLE_HEAPS + +uint8_t** make_mark_list (size_t size); + +#ifdef USE_VXSORT +void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high); +#endif //USE_VXSORT + +uint8_t* compute_next_end (heap_segment* seg, uint8_t* low); + +inline +size_t& card_table_size (uint32_t* c_table) +{ + return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->size; +} + +#ifdef USE_REGIONS +extern region_allocator global_region_allocator; +extern uint8_t*(*initial_regions)[total_generation_count][2]; +extern const size_t uninitialized_end_gen0_region_space; +size_t size_region_to_generation_table_of (uint8_t* from, uint8_t* end); + +inline +heap_segment* get_region_info_for_address (uint8_t* address) +{ + size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; + heap_segment* basic_region_info_entry = (heap_segment*)&seg_mapping_table[basic_region_index]; + ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated (basic_region_info_entry); + if (first_field < 0) + { + basic_region_index += first_field; + } + + return ((heap_segment*)(&seg_mapping_table[basic_region_index])); +} + +#ifdef DYNAMIC_HEAP_COUNT +#define DECOMMISSIONED_VALUE 0xdec0dec0dec0dec0 +static const size_t DECOMMISSIONED_SIZE_T = DECOMMISSIONED_VALUE; +static const ptrdiff_t DECOMMISSIONED_PTRDIFF_T = (ptrdiff_t)DECOMMISSIONED_VALUE; +static const ptrdiff_t DECOMMISSIONED_UINT64_T = (uint64_t)DECOMMISSIONED_VALUE; +static uint8_t* const DECOMMISSIONED_UINT8_T_P = (uint8_t*)DECOMMISSIONED_VALUE; +static uint8_t** const DECOMMISSIONED_UINT8_T_PP = (uint8_t**)DECOMMISSIONED_VALUE; +static PTR_heap_segment const DECOMMISSIONED_REGION_P = (PTR_heap_segment)DECOMMISSIONED_VALUE; +static mark* const DECOMMISSIONED_MARK_P = (mark*)DECOMMISSIONED_VALUE; +static const BOOL DECOMMISSIONED_BOOL = 0xdec0dec0; +static const BOOL DECOMMISSIONED_INT = (int)0xdec0dec0; +static const float DECOMMISSIONED_FLOAT = (float)DECOMMISSIONED_VALUE; +static const ptrdiff_t UNINITIALIZED_VALUE = 0xbaadbaadbaadbaad; +#endif //DYNAMIC_HEAP_COUNT + +inline bool is_in_heap_range (uint8_t* o) +{ +#ifdef FEATURE_BASICFREEZE + assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) || + (o == nullptr) || (ro_segment_lookup (o) != nullptr)); + return ((g_gc_lowest_address <= o) && (o < g_gc_highest_address)); +#else //FEATURE_BASICFREEZE + assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address)); + return (o != nullptr); +#endif //FEATURE_BASICFREEZE +} + +inline +uint8_t* get_region_start (heap_segment* region_info) +{ + uint8_t* obj_start = heap_segment_mem (region_info); + return (obj_start - sizeof (aligned_plug_and_gap)); +} + +inline +size_t get_region_size (heap_segment* region_info) +{ + return (size_t)(heap_segment_reserved (region_info) - get_region_start (region_info)); +} + +inline +size_t get_region_committed_size (heap_segment* region) +{ + uint8_t* start = get_region_start (region); + uint8_t* committed = heap_segment_committed (region); + return committed - start; +} + +inline +size_t get_skewed_basic_region_index_for_address (uint8_t* address) +{ + assert ((g_gc_lowest_address <= address) && (address <= g_gc_highest_address)); + size_t skewed_basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr; + return skewed_basic_region_index; +} + +inline +size_t get_basic_region_index_for_address (uint8_t* address) +{ + size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (address); + return (skewed_basic_region_index - get_skewed_basic_region_index_for_address (g_gc_lowest_address)); +} + +inline +heap_segment* get_region_info (uint8_t* region_start) +{ + size_t region_index = (size_t)region_start >> gc_heap::min_segment_size_shr; + heap_segment* region_info_entry = (heap_segment*)&seg_mapping_table[region_index]; + dprintf (REGIONS_LOG, ("region info for region %p is at %zd, %zx (alloc: %p)", + region_start, region_index, (size_t)region_info_entry, heap_segment_allocated (region_info_entry))); + return (heap_segment*)&seg_mapping_table[region_index]; +} + +inline +bool is_free_region (heap_segment* region) +{ + return (heap_segment_allocated (region) == nullptr); +} + +inline +void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num, bool replace_p) +{ + int gen_num = heap_segment_gen_num (region); + int supposed_plan_gen_num = get_plan_gen_num (gen_num); + dprintf (REGIONS_LOG, ("h%d setting plan gen on %p->%p(was gen%d) to %d(should be: %d) %s", + heap_number, region, + heap_segment_mem (region), + gen_num, plan_gen_num, + supposed_plan_gen_num, + ((plan_gen_num < supposed_plan_gen_num) ? "DEMOTED" : "ND"))); + region_info region_info_bits_to_set = (region_info)(plan_gen_num << RI_PLAN_GEN_SHR); + if ((plan_gen_num < supposed_plan_gen_num) && (heap_segment_pinned_survived (region) != 0)) + { + if (!settings.demotion) + { + settings.demotion = TRUE; + } + get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit); + region->flags |= heap_segment_flags_demoted; + region_info_bits_to_set = (region_info)(region_info_bits_to_set | RI_DEMOTED); + } + else + { + region->flags &= ~heap_segment_flags_demoted; + } + + if (replace_p) + { + int original_plan_gen_num = heap_segment_plan_gen_num (region); + planned_regions_per_gen[original_plan_gen_num]--; + } + + planned_regions_per_gen[plan_gen_num]++; + dprintf (REGIONS_LOG, ("h%d g%d %zx(%zx) -> g%d (total %d region planned in g%d)", + heap_number, heap_segment_gen_num (region), (size_t)region, heap_segment_mem (region), plan_gen_num, planned_regions_per_gen[plan_gen_num], plan_gen_num)); + + heap_segment_plan_gen_num (region) = plan_gen_num; + + uint8_t* region_start = get_region_start (region); + uint8_t* region_end = heap_segment_reserved (region); + + size_t region_index_start = get_basic_region_index_for_address (region_start); + size_t region_index_end = get_basic_region_index_for_address (region_end); + for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) + { + assert (plan_gen_num <= max_generation); + map_region_to_generation[region_index] = (region_info)(region_info_bits_to_set | (map_region_to_generation[region_index] & ~(RI_PLAN_GEN_MASK|RI_DEMOTED))); + } +} + +inline +void gc_heap::set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_num) +{ + if (!heap_segment_swept_in_plan (region)) + { + set_region_plan_gen_num (region, plan_gen_num); + } +} +#endif //USE_REGIONS + +extern const int32_t lock_free; +extern const int32_t lock_taken; +extern const int32_t lock_decommissioned; + +#define demotion_plug_len_th (6*1024*1024) +#define LOH_PIN_QUEUE_LENGTH 100 +#define LOH_PIN_DECAY 10 + +#ifdef USE_REGIONS +#define sip_surv_ratio_th (90) +#define sip_old_card_surv_ratio_th (90) +#endif //USE_REGIONS + +static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock); + +#ifdef _DEBUG +#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) \ + _ASSERTE((pSpinLock)->holding_thread == GCToEEInterface::GetThread()); + +inline +BOOL try_enter_spin_lock(GCSpinLock* pSpinLock) +{ + BOOL ret = (Interlocked::CompareExchange(&pSpinLock->lock, 0, -1) == -1); + if (ret) + { + pSpinLock->holding_thread = GCToEEInterface::GetThread(); + } + + return ret; +} +#else // _DEBUG +#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) + +inline +BOOL try_enter_spin_lock(GCSpinLock* spin_lock) +{ + return (Interlocked::CompareExchange(&spin_lock->lock, 0, -1) == -1); +} +#endif // _DEBUG + +inline +enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl) +{ + if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) == lock_free) + return msl_entered; + + return enter_spin_lock_msl_helper (msl); +} + +#ifdef _DEBUG +inline +void enter_spin_lock (GCSpinLock *pSpinLock) +{ + enter_spin_lock_noinstru (&pSpinLock->lock); + assert (pSpinLock->holding_thread == (Thread*)-1); + pSpinLock->holding_thread = GCToEEInterface::GetThread(); +} +#else //_DEBUG +inline +void WaitLonger (int i +#ifdef SYNCHRONIZATION_STATS + , GCSpinLock* spin_lock +#endif //SYNCHRONIZATION_STATS + ) +{ +#ifdef SYNCHRONIZATION_STATS + (spin_lock->num_wait_longer)++; +#endif //SYNCHRONIZATION_STATS + + bool bToggleGC = GCToEEInterface::EnablePreemptiveGC(); + assert (bToggleGC); + + if (!gc_heap::gc_started) + { +#ifdef SYNCHRONIZATION_STATS + (spin_lock->num_switch_thread_w)++; +#endif //SYNCHRONIZATION_STATS + if (g_num_processors > 1) + { + YieldProcessor(); + if (i & 0x01f) + GCToOSInterface::YieldThread (0); + else + GCToOSInterface::Sleep (5); + } + else + GCToOSInterface::Sleep (5); + } + + if (gc_heap::gc_started) + { + gc_heap::wait_for_gc_done(); + } + + if (bToggleGC) + { +#ifdef SYNCHRONIZATION_STATS + (spin_lock->num_disable_preemptive_w)++; +#endif //SYNCHRONIZATION_STATS + GCToEEInterface::DisablePreemptiveGC(); + } +} + +inline +void enter_spin_lock (GCSpinLock* spin_lock) +{ +retry: + if (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) != lock_free) + { + unsigned int i = 0; + while (spin_lock->lock != lock_free) + { + assert (spin_lock->lock != lock_decommissioned); + if ((++i & 7) && !gc_heap::gc_started) + { + if (g_num_processors > 1) + { +#ifndef MULTIPLE_HEAPS + int spin_count = 32 * yp_spin_count_unit; +#else //!MULTIPLE_HEAPS + int spin_count = yp_spin_count_unit; +#endif //!MULTIPLE_HEAPS + for (int j = 0; j < spin_count; j++) + { + if (spin_lock->lock == lock_free || gc_heap::gc_started) + break; + YieldProcessor(); + } + if (spin_lock->lock != lock_free && !gc_heap::gc_started) + { +#ifdef SYNCHRONIZATION_STATS + (spin_lock->num_switch_thread)++; +#endif //SYNCHRONIZATION_STATS + bool cooperative_mode = gc_heap::enable_preemptive (); + GCToOSInterface::YieldThread(0); + gc_heap::disable_preemptive (cooperative_mode); + } + } + else + GCToOSInterface::YieldThread(0); + } + else + { + WaitLonger(i +#ifdef SYNCHRONIZATION_STATS + , spin_lock +#endif //SYNCHRONIZATION_STATS + ); + } + } + goto retry; + } +} +#endif //_DEBUG + +inline +void leave_spin_lock(GCSpinLock *pSpinLock) +{ +#ifdef _DEBUG + bool gc_thread_p = GCToEEInterface::WasCurrentThreadCreatedByGC(); + pSpinLock->released_by_gc_p = gc_thread_p; + pSpinLock->holding_thread = (Thread*) -1; +#endif // _DEBUG + if (pSpinLock->lock != -1) + VolatileStore((int32_t*)&pSpinLock->lock, -1); +} + +inline +BOOL same_large_alignment_p (uint8_t* p1, uint8_t* p2) +{ +#ifdef RESPECT_LARGE_ALIGNMENT + const size_t LARGE_ALIGNMENT_MASK = 2 * DATA_ALIGNMENT - 1; + return ((((size_t)p1 ^ (size_t)p2) & LARGE_ALIGNMENT_MASK) == 0); +#else + UNREFERENCED_PARAMETER(p1); + UNREFERENCED_PARAMETER(p2); + return TRUE; +#endif // RESPECT_LARGE_ALIGNMENT +} + +inline +size_t switch_alignment_size (BOOL already_padded_p) +{ +#ifndef RESPECT_LARGE_ALIGNMENT + assert (!"Should not be called"); +#endif // RESPECT_LARGE_ALIGNMENT + + if (already_padded_p) + return DATA_ALIGNMENT; + else + return Align (min_obj_size) | DATA_ALIGNMENT; +} + +#define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN) +#define END_SPACE_AFTER_GC_FL (END_SPACE_AFTER_GC + Align (min_obj_size)) + +inline +size_t round_up_power2 (size_t size) +{ + // Get the 0-based index of the most-significant bit in size-1. + // If the call failed (because size-1 is zero), size must be 1, + // so return 1 (because 1 rounds up to itself). + DWORD highest_set_bit_index; + if (0 == +#ifdef HOST_64BIT + BitScanReverse64( +#else + BitScanReverse( +#endif + &highest_set_bit_index, size - 1)) { return 1; } + + // The size == 0 case (which would have overflowed to SIZE_MAX when decremented) + // is handled below by relying on the fact that highest_set_bit_index is the maximum value + // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that + // number of bits shifts in zeros from the right, resulting in an output of zero. + return static_cast(2) << highest_set_bit_index; +} + +inline +size_t round_down_power2 (size_t size) +{ + DWORD highest_set_bit_index; + if (0 == +#ifdef HOST_64BIT + BitScanReverse64( +#else + BitScanReverse( +#endif + &highest_set_bit_index, size)) { return 0; } + + return static_cast(1) << highest_set_bit_index; +} + +extern size_t loh_size_threshold; + +inline +float mb (size_t num) +{ + return (float)((float)num / 1000.0 / 1000.0); +} + +inline +uint32_t limit_time_to_uint32 (uint64_t time) +{ + time = min (time, (uint64_t)UINT32_MAX); + return (uint32_t)time; +} + +inline +size_t align_on_page (size_t add) +{ + return ((add + OS_PAGE_SIZE - 1) & ~((size_t)OS_PAGE_SIZE - 1)); +} + +inline +uint8_t* align_on_page (uint8_t* add) +{ + return (uint8_t*)align_on_page ((size_t) add); +} + +inline +void memclr (uint8_t* mem, size_t size) +{ + dprintf (3, ("MEMCLR: %p, %zd", mem, size)); + assert ((size & (sizeof(PTR_PTR) - 1)) == 0); + assert (sizeof(PTR_PTR) == DATA_ALIGNMENT); + memset (mem, 0, size); +} + +inline +size_t align_lower_page (size_t add) +{ + return (add & ~((size_t)OS_PAGE_SIZE - 1)); +} + +inline +uint8_t* align_lower_page (uint8_t* add) +{ + return (uint8_t*)align_lower_page ((size_t)add); +} + +#ifdef HOST_64BIT +#define mark_bit_pitch ((size_t)16) +#else +#define mark_bit_pitch ((size_t)8) +#endif //HOST_64BIT +#define mark_word_width ((size_t)32) +#define mark_word_size (mark_word_width * mark_bit_pitch) + +inline +uint8_t* align_on_mark_word (uint8_t* add) +{ + return (uint8_t*)((size_t)(add + mark_word_size - 1) & ~(mark_word_size - 1)); +} + +inline +size_t mark_bit_of (uint8_t* add) +{ + return ((size_t)add / mark_bit_pitch); +} + +inline +unsigned int mark_bit_bit (size_t mark_bit) +{ + return (unsigned int)(mark_bit % mark_word_width); +} + +inline +size_t mark_bit_bit_of (uint8_t* add) +{ + return (((size_t)add / mark_bit_pitch) % mark_word_width); +} + +inline +size_t mark_bit_word (size_t mark_bit) +{ + return (mark_bit / mark_word_width); +} + +inline +size_t mark_word_of (uint8_t* add) +{ + return ((size_t)add) / mark_word_size; +} + +inline +uint8_t* mark_word_address (size_t wd) +{ + return (uint8_t*)(wd * mark_word_size); +} + +#ifdef BACKGROUND_GC +inline +size_t& gc_heap::bpromoted_bytes (int thread) +{ +#ifdef MULTIPLE_HEAPS + return g_bpromoted [thread * 16]; +#else //MULTIPLE_HEAPS + UNREFERENCED_PARAMETER(thread); + return g_bpromoted; +#endif //MULTIPLE_HEAPS +} + +inline +unsigned int gc_heap::mark_array_marked (uint8_t* add) +{ + return mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add)); +} + +inline +void gc_heap::mark_array_set_marked (uint8_t* add) +{ + size_t index = mark_word_of (add); + uint32_t val = (1 << mark_bit_bit_of (add)); +#ifdef MULTIPLE_HEAPS + Interlocked::Or (&(mark_array [index]), val); +#else + mark_array [index] |= val; +#endif +} + +inline +void gc_heap::mark_array_clear_marked (uint8_t* add) +{ + mark_array [mark_word_of (add)] &= ~(1 << mark_bit_bit_of (add)); +} + +#ifdef FEATURE_BASICFREEZE +inline +void gc_heap::seg_clear_mark_array_bits_soh (heap_segment* seg) +{ + uint8_t* range_beg = 0; + uint8_t* range_end = 0; + if (bgc_mark_array_range (seg, FALSE, &range_beg, &range_end)) + { + clear_mark_array (range_beg, align_on_mark_word (range_end)); + } +} +#endif //FEATURE_BASICFREEZE +#endif //BACKGROUND_GC + +inline +BOOL gc_heap::is_mark_set (uint8_t* o) +{ + return marked (o); +} + +inline +void gc_heap::pin_object (uint8_t* o, uint8_t** ppObject) +{ + dprintf (3, ("Pinning %zx->%zx", (size_t)ppObject, (size_t)o)); + set_pinned (o); + +#ifdef FEATURE_EVENT_TRACE + if (EVENT_ENABLED(PinObjectAtGCTime)) + { + fire_etw_pin_object_event (o, ppObject); + } +#endif // FEATURE_EVENT_TRACE + + num_pinned_objects++; +} + +#define contain_pointers(i) header(i)->ContainsGCPointers() +#ifdef COLLECTIBLE_CLASS +#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointersOrCollectible() +#define get_class_object(i) GCToEEInterface::GetLoaderAllocatorObjectForGC((Object *)i) +#define is_collectible(i) method_table(i)->Collectible() +#else //COLLECTIBLE_CLASS +#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointers() +#endif //COLLECTIBLE_CLASS + +inline +uint8_t*& card_table_lowest_address (uint32_t* c_table) +{ + return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->lowest_address; +} + +inline +uint8_t*& card_table_highest_address (uint32_t* c_table) +{ + return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->highest_address; +} + +inline +uint32_t*& card_table_next (uint32_t* c_table) +{ + // NOTE: The dac takes a dependency on card_table_info being right before c_table. + return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->next_card_table; +} + +void destroy_card_table (uint32_t* c_table); + +#define new_start() {if (ppstop <= start) {break;} else {parm = start}} +#define ignore_start 0 +#define use_start 1 + +#define go_through_object(mt,o,size,parm,start,start_useful,limit,exp) \ +{ \ + CGCDesc* map = CGCDesc::GetCGCDescFromMT((MethodTable*)(mt)); \ + CGCDescSeries* cur = map->GetHighestSeries(); \ + ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries(); \ + \ + if (cnt >= 0) \ + { \ + CGCDescSeries* last = map->GetLowestSeries(); \ + uint8_t** parm = 0; \ + do \ + { \ + assert (parm <= (uint8_t**)((o) + cur->GetSeriesOffset())); \ + parm = (uint8_t**)((o) + cur->GetSeriesOffset()); \ + uint8_t** ppstop = \ + (uint8_t**)((uint8_t*)parm + cur->GetSeriesSize() + (size));\ + if (!start_useful || (uint8_t*)ppstop > (start)) \ + { \ + if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);\ + while (parm < ppstop) \ + { \ + {exp} \ + parm++; \ + } \ + } \ + cur--; \ + \ + } while (cur >= last); \ + } \ + else \ + { \ + /* Handle the repeating case - array of valuetypes */ \ + uint8_t** parm = (uint8_t**)((o) + cur->startoffset); \ + if (start_useful && start > (uint8_t*)parm) \ + { \ + ptrdiff_t cs = mt->RawGetComponentSize(); \ + parm = (uint8_t**)((uint8_t*)parm + (((start) - (uint8_t*)parm)/cs)*cs); \ + } \ + while ((uint8_t*)parm < ((o)+(size)-plug_skew)) \ + { \ + for (ptrdiff_t __i = 0; __i > cnt; __i--) \ + { \ + HALF_SIZE_T skip = (cur->val_serie + __i)->skip; \ + HALF_SIZE_T nptrs = (cur->val_serie + __i)->nptrs; \ + uint8_t** ppstop = parm + nptrs; \ + if (!start_useful || (uint8_t*)ppstop > (start)) \ + { \ + if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start); \ + do \ + { \ + {exp} \ + parm++; \ + } while (parm < ppstop); \ + } \ + parm = (uint8_t**)((uint8_t*)ppstop + skip); \ + } \ + } \ + } \ +} + +#define go_through_object_nostart(mt,o,size,parm,exp) {go_through_object(mt,o,size,parm,o,ignore_start,(o + size),exp); } + +#ifndef COLLECTIBLE_CLASS +#define go_through_object_cl(mt,o,size,parm,exp) \ +{ \ + if (header(o)->ContainsGCPointers()) \ + { \ + go_through_object_nostart(mt,o,size,parm,exp); \ + } \ +} +#else //COLLECTIBLE_CLASS +#define go_through_object_cl(mt,o,size,parm,exp) \ +{ \ + if (header(o)->Collectible()) \ + { \ + uint8_t* class_obj = get_class_object (o); \ + uint8_t** parm = &class_obj; \ + do {exp} while (false); \ + } \ + if (header(o)->ContainsGCPointers()) \ + { \ + go_through_object_nostart(mt,o,size,parm,exp); \ + } \ +} +#endif //COLLECTIBLE_CLASS + +inline BOOL +gc_heap::dt_high_memory_load_p() +{ + return ((settings.entry_memory_load >= high_memory_load_th) || g_low_memory_status); +} + +#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) +#ifndef PREFETCH +#define PREFETCH +#endif +#endif + +#ifdef PREFETCH +inline void Prefetch(void* addr) +{ +#ifdef TARGET_WINDOWS + +#if defined(TARGET_AMD64) || defined(TARGET_X86) + +#ifndef _MM_HINT_T0 +#define _MM_HINT_T0 1 +#endif + _mm_prefetch((const char*)addr, _MM_HINT_T0); +#elif defined(TARGET_ARM64) + __prefetch((const char*)addr); +#endif //defined(TARGET_AMD64) || defined(TARGET_X86) + +#elif defined(TARGET_UNIX) + __builtin_prefetch(addr); +#else //!(TARGET_WINDOWS || TARGET_UNIX) + UNREFERENCED_PARAMETER(addr); +#endif //TARGET_WINDOWS +} +#else //PREFETCH +inline void Prefetch (void* addr) +{ + UNREFERENCED_PARAMETER(addr); +} +#endif //PREFETCH + +#ifdef BACKGROUND_GC +inline +void gc_heap::bgc_track_uoh_alloc() +{ + if (current_c_gc_state == c_gc_state_planning) + { + Interlocked::Increment (&uoh_alloc_thread_count); + dprintf (3, ("h%d: inc lc: %d", heap_number, (int32_t)uoh_alloc_thread_count)); + } +} + +inline +void gc_heap::bgc_untrack_uoh_alloc() +{ + if (current_c_gc_state == c_gc_state_planning) + { + Interlocked::Decrement (&uoh_alloc_thread_count); + dprintf (3, ("h%d: dec lc: %d", heap_number, (int32_t)uoh_alloc_thread_count)); + } +} + +#endif //BACKGROUND_GC + +inline +BOOL gc_heap::ephemeral_pointer_p (uint8_t* o) +{ +#ifdef USE_REGIONS + int gen_num = object_gennum ((uint8_t*)o); + assert (gen_num >= 0); + return (gen_num < max_generation); +#else + return ((o >= ephemeral_low) && (o < ephemeral_high)); +#endif //USE_REGIONS +} + +// Get the 0-based index of the most-significant bit in the value. +// Returns -1 if the input value is zero (i.e. has no set bits). +inline +int index_of_highest_set_bit (size_t value) +{ + // Get the 0-based index of the most-significant bit in the value. + // If the call failed (because value is zero), return -1. + DWORD highest_set_bit_index; + return (0 == +#ifdef HOST_64BIT + BitScanReverse64( +#else + BitScanReverse( +#endif + &highest_set_bit_index, value)) ? -1 : static_cast(highest_set_bit_index); +} + +inline +size_t gc_heap::generation_allocator_efficiency_percent (generation* inst) +{ +#ifdef DYNAMIC_HEAP_COUNT + if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) + { + uint64_t total_plan_allocated = generation_total_plan_allocated (inst); + uint64_t condemned_allocated = generation_condemned_allocated (inst); + return ((total_plan_allocated == 0) ? 0 : (100 * (total_plan_allocated - condemned_allocated) / total_plan_allocated)); + } + else +#endif //DYNAMIC_HEAP_COUNT + { + uint64_t free_obj_space = generation_free_obj_space (inst); + uint64_t free_list_allocated = generation_free_list_allocated (inst); + if ((free_list_allocated + free_obj_space) == 0) + { + return 0; + } + else + { + return (size_t)((100 * free_list_allocated) / (free_list_allocated + free_obj_space)); + } + } +} + +inline +size_t gc_heap::generation_unusable_fragmentation (generation* inst, int hn) +{ +#ifdef DYNAMIC_HEAP_COUNT + if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) + { + uint64_t total_plan_allocated = generation_total_plan_allocated (inst); + uint64_t condemned_allocated = generation_condemned_allocated (inst); + uint64_t unusable_frag = 0; + size_t fo_space = (((ptrdiff_t)generation_free_obj_space (inst) < 0) ? 0 : generation_free_obj_space (inst)); + + if (total_plan_allocated != 0) + { + unusable_frag = fo_space + (condemned_allocated * generation_free_list_space (inst) / total_plan_allocated); + } + + dprintf (3, ("h%d g%d FLa: %Id, ESa: %Id, Ca: %Id | FO: %Id, FL %Id, fl effi %.3f, unusable fl is %Id", + hn, inst->gen_num, + generation_free_list_allocated (inst), generation_end_seg_allocated (inst), (size_t)condemned_allocated, + fo_space, generation_free_list_space (inst), + ((total_plan_allocated == 0) ? 1.0 : ((float)(total_plan_allocated - condemned_allocated) / (float)total_plan_allocated)), + (size_t)unusable_frag)); + + return (size_t)unusable_frag; + } + else +#endif //DYNAMIC_HEAP_COUNT + { + uint64_t free_obj_space = generation_free_obj_space (inst); + uint64_t free_list_allocated = generation_free_list_allocated (inst); + uint64_t free_list_space = generation_free_list_space (inst); + if ((free_list_allocated + free_obj_space) == 0) + return 0; + return (size_t)(free_obj_space + (free_obj_space * free_list_space) / (free_list_allocated + free_obj_space)); + } +} + +inline +void gc_heap::check_loh_compact_mode (BOOL all_heaps_compacted_p) +{ + if (settings.loh_compaction && (loh_compaction_mode == loh_compaction_once)) + { + if (all_heaps_compacted_p) + { + // If the compaction mode says to compact once and we are going to compact LOH, + // we need to revert it back to no compaction. + loh_compaction_mode = loh_compaction_default; + } + } +} + +inline +gc_history_global* gc_heap::get_gc_data_global() +{ +#ifdef BACKGROUND_GC + return (settings.concurrent ? &bgc_data_global : &gc_data_global); +#else + return &gc_data_global; +#endif //BACKGROUND_GC +} + +inline +gc_history_per_heap* gc_heap::get_gc_data_per_heap() +{ +#ifdef BACKGROUND_GC + return (settings.concurrent ? &bgc_data_per_heap : &gc_data_per_heap); +#else + return &gc_data_per_heap; +#endif //BACKGROUND_GC +} + +inline +size_t gc_heap::brick_of (uint8_t* add) +{ + return (size_t)(add - lowest_address) / brick_size; +} + +inline +uint8_t* gc_heap::brick_address (size_t brick) +{ + return lowest_address + (brick_size * brick); +} + +//codes for the brick entries: +//entry == 0 -> not assigned +//entry >0 offset is entry-1 +//entry <0 jump back entry bricks +inline +void gc_heap::set_brick (size_t index, ptrdiff_t val) +{ + if (val < -32767) + { + val = -32767; + } + assert (val < 32767); + if (val >= 0) + brick_table [index] = (short)val+1; + else + brick_table [index] = (short)val; + + dprintf (3, ("set brick[%zx] to %d\n", index, (short)val)); +} + +inline +int gc_heap::get_brick_entry (size_t index) +{ +#ifdef MULTIPLE_HEAPS + return VolatileLoadWithoutBarrier(&brick_table [index]); +#else + return brick_table[index]; +#endif +} + +inline +uint8_t* gc_heap::card_address (size_t card) +{ + return (uint8_t*) (card_size * card); +} + +inline +size_t gc_heap::card_of ( uint8_t* object) +{ + return (size_t)(object) / card_size; +} + +inline +void gc_heap::clear_card (size_t card) +{ + card_table [card_word (card)] = + (card_table [card_word (card)] & ~(1 << card_bit (card))); + dprintf (3,("Cleared card %zx [%zx, %zx[", card, (size_t)card_address (card), + (size_t)card_address (card+1))); +} + +inline +void gc_heap::set_card (size_t card) +{ + size_t word = card_word (card); + card_table[word] = (card_table [word] | (1 << card_bit (card))); + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Also set the card bundle that corresponds to the card + size_t bundle_to_set = cardw_card_bundle(word); + + card_bundle_set(bundle_to_set); + + dprintf (3,("Set card %zx [%zx, %zx[ and bundle %zx", card, (size_t)card_address (card), (size_t)card_address (card+1), bundle_to_set)); +#endif +} + +inline +BOOL gc_heap::card_set_p (size_t card) +{ + return ( card_table [ card_word (card) ] & (1 << card_bit (card))); +} + +inline +int gc_heap::get_num_heaps() +{ +#ifdef MULTIPLE_HEAPS + return n_heaps; +#else + return 1; +#endif //MULTIPLE_HEAPS +} + +inline +ptrdiff_t gc_heap::get_desired_allocation (int gen_number) +{ + return dd_desired_allocation (dynamic_data_of (gen_number)); +} + +inline +ptrdiff_t gc_heap::get_new_allocation (int gen_number) +{ + return dd_new_allocation (dynamic_data_of (gen_number)); +} + +//return the amount allocated so far in gen_number +inline +ptrdiff_t gc_heap::get_allocation (int gen_number) +{ + dynamic_data* dd = dynamic_data_of (gen_number); + + return dd_desired_allocation (dd) - dd_new_allocation (dd); +} + +#ifdef BACKGROUND_GC +inline +BOOL gc_heap::background_marked (uint8_t* o) +{ + return mark_array_marked (o); +} + +inline +BOOL gc_heap::background_mark1 (uint8_t* o) +{ + BOOL to_mark = !mark_array_marked (o); + + dprintf (3, ("b*%zx*b(%d)", (size_t)o, (to_mark ? 1 : 0))); + if (to_mark) + { + mark_array_set_marked (o); + dprintf (4, ("n*%zx*n", (size_t)o)); + return TRUE; + } + else + return FALSE; +} + +// TODO: we could consider filtering out NULL's here instead of going to +// look for it on other heaps +inline +BOOL gc_heap::background_mark (uint8_t* o, uint8_t* low, uint8_t* high) +{ + BOOL marked = FALSE; + if ((o >= low) && (o < high)) + marked = background_mark1 (o); +#ifdef MULTIPLE_HEAPS + else if (o) + { + gc_heap* hp = heap_of (o); + assert (hp); + if ((o >= hp->background_saved_lowest_address) && (o < hp->background_saved_highest_address)) + marked = background_mark1 (o); + } +#endif //MULTIPLE_HEAPS + return marked; +} + +#endif //BACKGROUND_GC + +inline +size_t size_mark_array_of (uint8_t* from, uint8_t* end) +{ + assert (((size_t)from & ((mark_word_size)-1)) == 0); + assert (((size_t)end & ((mark_word_size)-1)) == 0); + return sizeof (uint32_t)*(((end - from) / mark_word_size)); +} + +inline +mark* gc_heap::pinned_plug_of (size_t bos) +{ + return &mark_stack_array [ bos ]; +} + +inline +mark* gc_heap::oldest_pin () +{ + return pinned_plug_of (mark_stack_bos); +} + +inline +BOOL gc_heap::pinned_plug_que_empty_p () +{ + return (mark_stack_bos == mark_stack_tos); +} + +#ifdef FEATURE_LOH_COMPACTION +inline +BOOL gc_heap::loh_pinned_plug_que_empty_p() +{ + return (loh_pinned_queue_bos == loh_pinned_queue_tos); +} +#endif // FEATURE_LOH_COMPACTION + +inline +mark* gc_heap::loh_pinned_plug_of (size_t bos) +{ + return &loh_pinned_queue[bos]; +} + +#ifdef USE_REGIONS +inline bool gc_heap::is_in_gc_range (uint8_t* o) +{ +#ifdef FEATURE_BASICFREEZE + // we may have frozen objects in read only segments + // outside of the reserved address range of the gc heap + assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) || + (o == nullptr) || (ro_segment_lookup (o) != nullptr)); +#else //FEATURE_BASICFREEZE + // without frozen objects, every non-null pointer must be + // within the heap + assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address)); +#endif //FEATURE_BASICFREEZE + return ((gc_low <= o) && (o < gc_high)); +} +#endif //USE_REGIONS + +#ifdef FEATURE_EVENT_TRACE +inline +void gc_heap::record_mark_time (uint64_t& mark_time, + uint64_t& current_mark_time, + uint64_t& last_mark_time) +{ + if (informational_event_enabled_p) + { + current_mark_time = GetHighPrecisionTimeStamp(); + mark_time = limit_time_to_uint32 (current_mark_time - last_mark_time); + dprintf (3, ("%zd - %zd = %zd", + current_mark_time, last_mark_time, (current_mark_time - last_mark_time))); + last_mark_time = current_mark_time; + } +} +#endif //FEATURE_EVENT_TRACE + +inline +void gc_heap::init_alloc_info (generation* gen, heap_segment* seg) +{ + generation_allocation_segment (gen) = seg; + generation_allocation_pointer (gen) = heap_segment_mem (seg); + generation_allocation_limit (gen) = generation_allocation_pointer (gen); + generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen); +} + +inline +uint8_t* pinned_plug (mark* m) +{ + return m->first; +} + +inline +size_t& pinned_len (mark* m) +{ + return m->len; +} + +inline +void set_new_pin_info (mark* m, uint8_t* pin_free_space_start) +{ + m->len = pinned_plug (m) - pin_free_space_start; +#ifdef SHORT_PLUGS + m->allocation_context_start_region = pin_free_space_start; +#endif //SHORT_PLUGS +} + +inline +void gc_heap::update_oldest_pinned_plug() +{ + oldest_pinned_plug = (pinned_plug_que_empty_p() ? 0 : pinned_plug (oldest_pin())); +} + +#if !defined(_MSC_VER) && !defined(__cdecl) +#if defined(TARGET_X86) +#define __cdecl __attribute__((cdecl)) +#else // TARGET_X86 +#define __cdecl +#endif // TARGET_X86 +#endif // !_MSC_VER && !__cdecl + +#ifdef FEATURE_STRUCTALIGN +#if defined (TARGET_AMD64) +#define brick_bits (12) +#else +#define brick_bits (11) +#endif //TARGET_AMD64 +static_assert(brick_size == (1 << brick_bits)); + +#define child_bits (brick_bits + 1 - LOG2_PTRSIZE) +#define pad_bits (sizeof(short) * 8 - child_bits) + +#define child_from_short(w) (((signed short)(w) / (1 << (pad_bits - LOG2_PTRSIZE))) & ~((1 << LOG2_PTRSIZE) - 1)) +#define pad_mask ((1 << pad_bits) - 1) +#define pad_from_short(w) ((size_t)(w) & pad_mask) +#else // FEATURE_STRUCTALIGN +#define child_from_short(w) (w) +#endif // FEATURE_STRUCTALIGN + +inline +short node_left_child(uint8_t* node) +{ + return child_from_short(((plug_and_pair*)node)[-1].m_pair.left); +} + +inline +short node_right_child(uint8_t* node) +{ + return child_from_short(((plug_and_pair*)node)[-1].m_pair.right); +} + +inline +size_t node_gap_size (uint8_t* node) +{ + return ((plug_and_gap*)node)[-1].gap; +} + +inline +ptrdiff_t loh_node_relocation_distance(uint8_t* node) +{ + return (((loh_obj_and_pad*)node)[-1].reloc); +} + +inline +void loh_set_node_relocation_distance(uint8_t* node, ptrdiff_t val) +{ + ptrdiff_t* place = &(((loh_obj_and_pad*)node)[-1].reloc); + *place = val; +} + +inline +ptrdiff_t node_relocation_distance (uint8_t* node) +{ + return (((plug_and_reloc*)(node))[-1].reloc & ~3); +} + +inline +void set_node_relocation_distance(uint8_t* node, ptrdiff_t val) +{ + assert (val == (val & ~3)); + ptrdiff_t* place = &(((plug_and_reloc*)node)[-1].reloc); + *place &= 1; + *place |= val; +} + +#define node_left_p(node) (((plug_and_reloc*)(node))[-1].reloc & 2) +#define set_node_left(node) ((plug_and_reloc*)(node))[-1].reloc |= 2 + +inline +void set_node_left_child(uint8_t* node, ptrdiff_t val) +{ + assert (val > -(ptrdiff_t)brick_size); + assert (val < (ptrdiff_t)brick_size); + assert (Aligned (val)); +#ifdef FEATURE_STRUCTALIGN + size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.left); + ((plug_and_pair*)node)[-1].m_pair.left = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad; +#else // FEATURE_STRUCTALIGN + ((plug_and_pair*)node)[-1].m_pair.left = (short)val; +#endif // FEATURE_STRUCTALIGN + assert (node_left_child (node) == val); +} + +inline +void set_node_right_child(uint8_t* node, ptrdiff_t val) +{ + assert (val > -(ptrdiff_t)brick_size); + assert (val < (ptrdiff_t)brick_size); + assert (Aligned (val)); +#ifdef FEATURE_STRUCTALIGN + size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.right); + ((plug_and_pair*)node)[-1].m_pair.right = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad; +#else // FEATURE_STRUCTALIGN + ((plug_and_pair*)node)[-1].m_pair.right = (short)val; +#endif // FEATURE_STRUCTALIGN + assert (node_right_child (node) == val); +} + +inline +void set_gap_size (uint8_t* node, size_t size) +{ + assert (Aligned (size)); + + ((plug_and_gap *)node)[-1].reloc = 0; + ((plug_and_gap *)node)[-1].lr = 0; + ((plug_and_gap *)node)[-1].gap = size; + + assert ((size == 0) || (size >= sizeof(plug_and_reloc))); +} + +inline +uint8_t* tree_search (uint8_t* tree, uint8_t* old_address) +{ + uint8_t* candidate = 0; + int cn; + while (1) + { + if (tree < old_address) + { + if ((cn = node_right_child (tree)) != 0) + { + assert (candidate < tree); + candidate = tree; + tree = tree + cn; + Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left); + continue; + } + else + { + break; + } + } + else if (tree > old_address) + { + if ((cn = node_left_child (tree)) != 0) + { + tree = tree + cn; + Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left); + continue; + } + else + { + break; + } + } + else + { + break; + } + } + + if (tree <= old_address) + return tree; + else if (candidate) + return candidate; + else + return tree; +} + +#ifdef DOUBLY_LINKED_FL +inline +BOOL is_plug_bgc_mark_bit_set (uint8_t* node) +{ + return header(node)->IsBGCMarkBitSet(); +} + +inline +void clear_plug_bgc_mark_bit (uint8_t* node) +{ + header(node)->ClearBGCMarkBit(); +} + +inline +BOOL is_free_obj_in_compact_bit_set (uint8_t* node) +{ + return header(node)->IsFreeObjInCompactBitSet(); +} + +inline +void clear_free_obj_in_compact_bit (uint8_t* node) +{ + header(node)->ClearFreeObjInCompactBit(); +} + +inline +BOOL is_on_free_list (uint8_t* o, size_t size) +{ + if (size >= min_free_list) + { + if (header(o)->GetMethodTable() == g_gc_pFreeObjectMethodTable) + { + return (free_list_prev (o) != PREV_EMPTY); + } + } + + return FALSE; +} +#endif //DOUBLY_LINKED_FL + +#ifdef SHORT_PLUGS +inline +void clear_plug_padded (uint8_t* node) +{ + header(node)->ClearMarked(); +} +#else //SHORT_PLUGS +inline +void clear_plug_padded (uint8_t* node) +{ + UNREFERENCED_PARAMETER(node); +} +#endif //SHORT_PLUGS + +inline +heap_segment* heap_segment_non_sip (heap_segment* ns) +{ +#ifdef USE_REGIONS + if ((ns == 0) || !heap_segment_swept_in_plan (ns)) + { + return ns; + } + else + { + do + { + if (heap_segment_swept_in_plan (ns)) + { + dprintf (REGIONS_LOG, ("region %p->%p SIP", + heap_segment_mem (ns), heap_segment_allocated (ns))); + } + + ns = heap_segment_next (ns); + } while ((ns != 0) && heap_segment_swept_in_plan (ns)); + return ns; + } +#else //USE_REGIONS + return ns; +#endif //USE_REGIONS +} + +inline +heap_segment* heap_segment_next_non_sip (heap_segment* seg) +{ + heap_segment* ns = heap_segment_next (seg); +#ifdef USE_REGIONS + return heap_segment_non_sip (ns); +#else + return ns; +#endif //USE_REGIONS +} + +inline +static void safe_switch_to_thread() +{ + bool cooperative_mode = gc_heap::enable_preemptive(); + + GCToOSInterface::YieldThread(0); + + gc_heap::disable_preemptive(cooperative_mode); +} + +void WaitLongerNoInstru (int i); + +extern const int32_t lock_free; +extern const int32_t lock_taken; +extern const int32_t lock_decommissioned; + +// +// We need the following methods to have volatile arguments, so that they can accept +// raw pointers in addition to the results of the & operator on Volatile. +// this will never be used for the more_space_lock_xxx, which is why +// "lock_decommissioned" cannot happen. +inline +static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock) +{ +retry: + + if (Interlocked::CompareExchange(lock, lock_taken, lock_free) != lock_free) + { + unsigned int i = 0; + while (VolatileLoad(lock) != lock_free) + { + // will never be used for more_space_lock_xxx + assert (VolatileLoad(lock) != lock_decommissioned); + if ((++i & 7) && !IsGCInProgress()) + { + if (g_num_processors > 1) + { +#ifndef MULTIPLE_HEAPS + int spin_count = 32 * yp_spin_count_unit; +#else //!MULTIPLE_HEAPS + int spin_count = yp_spin_count_unit; +#endif //!MULTIPLE_HEAPS + for (int j = 0; j < spin_count; j++) + { + if (VolatileLoad(lock) == lock_free || IsGCInProgress()) + break; + YieldProcessor(); // indicate to the processor that we are spinning + } + if (VolatileLoad(lock) != lock_free && !IsGCInProgress()) + { + safe_switch_to_thread(); + } + } + else + { + safe_switch_to_thread(); + } + } + else + { + WaitLongerNoInstru(i); + } + } + goto retry; + } +} + +inline +static BOOL try_enter_spin_lock_noinstru(RAW_KEYWORD(volatile) int32_t* lock) +{ + return (Interlocked::CompareExchange(&*lock, lock_taken, lock_free) == lock_free); +} + +inline +static void leave_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock) +{ + VolatileStore((int32_t*)lock, lock_free); +} + +inline +BOOL power_of_two_p (size_t integer) +{ + return !(integer & (integer-1)); +} + +#ifdef FEATURE_STRUCTALIGN +void set_node_aligninfo (uint8_t *node, int requiredAlignment, ptrdiff_t pad); +void clear_node_aligninfo (uint8_t *node); +#else // FEATURE_STRUCTALIGN +#define node_realigned(node) (((plug_and_reloc*)(node))[-1].reloc & 1) +void set_node_realigned (uint8_t* node); +void clear_node_realigned(uint8_t* node); +#endif // FEATURE_STRUCTALIGN + +#define OBJECT_ALIGNMENT_OFFSET (sizeof(MethodTable *)) + +#ifdef FEATURE_STRUCTALIGN +#define MAX_STRUCTALIGN OS_PAGE_SIZE +#else // FEATURE_STRUCTALIGN +#define MAX_STRUCTALIGN 0 +#endif // FEATURE_STRUCTALIGN + +#ifdef FEATURE_STRUCTALIGN +inline +ptrdiff_t AdjustmentForMinPadSize(ptrdiff_t pad, int requiredAlignment) +{ + // The resulting alignpad must be either 0 or at least min_obj_size. + // Note that by computing the following difference on unsigned types, + // we can do the range check 0 < alignpad < min_obj_size with a + // single conditional branch. + if ((size_t)(pad - DATA_ALIGNMENT) < Align (min_obj_size) - DATA_ALIGNMENT) + { + return requiredAlignment; + } + return 0; +} + + +inline +ptrdiff_t ComputeStructAlignPad (uint8_t* plug, int requiredAlignment, size_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET) +{ + return StructAlign (plug, requiredAlignment, alignmentOffset) - plug; +} + +inline +BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment) +{ + return StructAlign (ptr, requiredAlignment) == ptr; +} + +inline +ptrdiff_t ComputeMaxStructAlignPad (int requiredAlignment) +{ + if (requiredAlignment == DATA_ALIGNMENT) + return 0; + // Since a non-zero alignment padding cannot be less than min_obj_size (so we can fit the + // alignment padding object), the worst-case alignment padding is correspondingly larger + // than the required alignment. + return requiredAlignment + Align (min_obj_size) - DATA_ALIGNMENT; +} + +inline +ptrdiff_t ComputeMaxStructAlignPadLarge (int requiredAlignment) +{ + if (requiredAlignment <= get_alignment_constant (TRUE)+1) + return 0; + // This is the same as ComputeMaxStructAlignPad, except that in addition to leaving space + // for padding before the actual object, it also leaves space for filling a gap after the + // actual object. This is needed on the large object heap, as the outer allocation functions + // don't operate on an allocation context (which would have left space for the final gap). + return requiredAlignment + Align (min_obj_size) * 2 - DATA_ALIGNMENT; +} + +#else // FEATURE_STRUCTALIGN +#define ComputeMaxStructAlignPad(requiredAlignment) 0 +#define ComputeMaxStructAlignPadLarge(requiredAlignment) 0 +#endif // FEATURE_STRUCTALIGN + +#ifndef FEATURE_STRUCTALIGN +#define node_realigned(node) (((plug_and_reloc*)(node))[-1].reloc & 1) +void set_node_realigned (uint8_t* node); +#endif // FEATURE_STRUCTALIGN + +#define commit_min_th (16*OS_PAGE_SIZE) +#define UOH_ALLOCATION_RETRY_MAX_COUNT 2 + +#ifdef TRACE_GC +extern const char* const allocation_state_str[]; +#endif //TRACE_GC + +extern const size_t etw_allocation_tick; +extern const size_t fgn_check_quantum; + +#ifdef BACKGROUND_GC +extern uint32_t bgc_alloc_spin_count; +extern uint32_t bgc_alloc_spin; +#endif //BACKGROUND_GC + +#define check_msl_status(msg, size) if (msl_status == msl_retry_different_heap) \ + { \ + dprintf (5555, ("h%d RETRY %s(%Id)", heap_number, msg, size)); \ + return a_state_retry_allocate; \ + } + +#ifdef DOUBLY_LINKED_FL +inline +void set_plug_bgc_mark_bit (uint8_t* node) +{ + header(node)->SetBGCMarkBit(); +} + +inline +void set_free_obj_in_compact_bit (uint8_t* node) +{ + header(node)->SetFreeObjInCompactBit(); +} +#endif //DOUBLY_LINKED_FL + +#ifdef SHORT_PLUGS +inline +void set_plug_padded (uint8_t* node) +{ + header(node)->SetMarked(); +} + +inline +BOOL is_plug_padded (uint8_t* node) +{ + return header(node)->IsMarked(); +} +#else //SHORT_PLUGS +inline +void set_plug_padded (uint8_t* node) +{ + UNREFERENCED_PARAMETER(node); +} + +inline +BOOL is_plug_padded (uint8_t* node) +{ + UNREFERENCED_PARAMETER(node); + return FALSE; +} +#endif //SHORT_PLUGS + +#ifdef SHORT_PLUGS +inline +uint8_t*& pin_allocation_context_start_region (mark* m) +{ + return m->allocation_context_start_region; +} + +inline +uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry) +{ + uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info()); + uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap))); + dprintf (2, ("EP: %p(%p), %p", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved)); + return plug_start_in_saved; +} +#endif //SHORT_PLUGS + +#ifndef USE_REGIONS +class seg_free_spaces +{ + struct seg_free_space + { + BOOL is_plug; + void* start; + }; + + struct free_space_bucket + { + seg_free_space* free_space; + ptrdiff_t count_add; + ptrdiff_t count_fit; + }; + + void move_bucket (int old_power2, int new_power2) + { + assert (old_power2 >= 0); + assert (old_power2 >= new_power2); + + if (old_power2 == new_power2) + { + return; + } + + seg_free_space* src_index = free_space_buckets[old_power2].free_space; + for (int i = old_power2; i > new_power2; i--) + { + seg_free_space** dest = &(free_space_buckets[i].free_space); + (*dest)++; + + seg_free_space* dest_index = free_space_buckets[i - 1].free_space; + if (i > (new_power2 + 1)) + { + seg_free_space temp = *src_index; + *src_index = *dest_index; + *dest_index = temp; + } + src_index = dest_index; + } + + free_space_buckets[old_power2].count_fit--; + free_space_buckets[new_power2].count_fit++; + } + +#ifdef _DEBUG + void dump_free_space (seg_free_space* item) + { + uint8_t* addr = 0; + size_t len = 0; + + if (item->is_plug) + { + mark* m = (mark*)(item->start); + len = pinned_len (m); + addr = pinned_plug (m) - len; + } + else + { + heap_segment* seg = (heap_segment*)(item->start); + addr = heap_segment_plan_allocated (seg); + len = heap_segment_committed (seg) - addr; + } + + dprintf (SEG_REUSE_LOG_1, ("[%d]0x%p %zd", heap_num, addr, len)); + } + + void dump() + { + seg_free_space* item = NULL; + int i = 0; + + dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------\nnow the free spaces look like:", heap_num)); + for (i = 0; i < (free_space_bucket_count - 1); i++) + { + dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i))); + dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len")); + item = free_space_buckets[i].free_space; + while (item < free_space_buckets[i + 1].free_space) + { + dump_free_space (item); + item++; + } + dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num)); + } + + dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i))); + dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len")); + item = free_space_buckets[i].free_space; + + while (item <= &seg_free_space_array[free_space_item_count - 1]) + { + dump_free_space (item); + item++; + } + dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num)); + } +#endif //_DEBUG + + free_space_bucket* free_space_buckets; + seg_free_space* seg_free_space_array; + ptrdiff_t free_space_bucket_count; + ptrdiff_t free_space_item_count; + int base_power2; + int heap_num; +#ifdef _DEBUG + BOOL has_end_of_seg; +#endif //_DEBUG + +public: + seg_free_spaces (int h_number) + { + heap_num = h_number; + } + + BOOL alloc () + { + size_t total_prealloc_size = + MAX_NUM_BUCKETS * sizeof (free_space_bucket) + + MAX_NUM_FREE_SPACES * sizeof (seg_free_space); + + free_space_buckets = (free_space_bucket*) new (nothrow) uint8_t[total_prealloc_size]; + + return (!!free_space_buckets); + } + + void add_buckets (int base, size_t* ordered_free_spaces, int bucket_count, size_t item_count) + { + assert (free_space_buckets); + assert (item_count <= (size_t)MAX_PTR); + + free_space_bucket_count = bucket_count; + free_space_item_count = item_count; + base_power2 = base; +#ifdef _DEBUG + has_end_of_seg = FALSE; +#endif //_DEBUG + + ptrdiff_t total_item_count = 0; + ptrdiff_t i = 0; + + seg_free_space_array = (seg_free_space*)(free_space_buckets + free_space_bucket_count); + + for (i = 0; i < (ptrdiff_t)item_count; i++) + { + seg_free_space_array[i].start = 0; + seg_free_space_array[i].is_plug = FALSE; + } + + for (i = 0; i < bucket_count; i++) + { + free_space_buckets[i].count_add = ordered_free_spaces[i]; + free_space_buckets[i].count_fit = ordered_free_spaces[i]; + free_space_buckets[i].free_space = &seg_free_space_array[total_item_count]; + total_item_count += free_space_buckets[i].count_add; + } + + assert (total_item_count == (ptrdiff_t)item_count); + } + + void add (void* start, BOOL plug_p, BOOL first_p) + { + size_t size = (plug_p ? + pinned_len ((mark*)start) : + (heap_segment_committed ((heap_segment*)start) - + heap_segment_plan_allocated ((heap_segment*)start))); + + if (plug_p) + { + dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space before plug: %zd", heap_num, size)); + } + else + { + dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space at end of seg: %zd", heap_num, size)); +#ifdef _DEBUG + has_end_of_seg = TRUE; +#endif //_DEBUG + } + + if (first_p) + { + size_t eph_gen_starts = gc_heap::eph_gen_starts_size; + size -= eph_gen_starts; + if (plug_p) + { + mark* m = (mark*)(start); + pinned_len (m) -= eph_gen_starts; + } + else + { + heap_segment* seg = (heap_segment*)start; + heap_segment_plan_allocated (seg) += eph_gen_starts; + } + } + + int bucket_power2 = index_of_highest_set_bit (size); + if (bucket_power2 < base_power2) + { + return; + } + + free_space_bucket* bucket = &free_space_buckets[bucket_power2 - base_power2]; + + seg_free_space* bucket_free_space = bucket->free_space; + assert (plug_p || (!plug_p && bucket->count_add)); + + if (bucket->count_add == 0) + { + dprintf (SEG_REUSE_LOG_1, ("[%d]Already have enough of 2^%d", heap_num, bucket_power2)); + return; + } + + ptrdiff_t index = bucket->count_add - 1; + + dprintf (SEG_REUSE_LOG_1, ("[%d]Building free spaces: adding %p; len: %zd (2^%d)", + heap_num, + (plug_p ? + (pinned_plug ((mark*)start) - pinned_len ((mark*)start)) : + heap_segment_plan_allocated ((heap_segment*)start)), + size, + bucket_power2)); + + if (plug_p) + { + bucket_free_space[index].is_plug = TRUE; + } + + bucket_free_space[index].start = start; + bucket->count_add--; + } + +#ifdef _DEBUG + void check() + { + ptrdiff_t i = 0; + int end_of_seg_count = 0; + + for (i = 0; i < free_space_item_count; i++) + { + assert (seg_free_space_array[i].start); + if (!(seg_free_space_array[i].is_plug)) + { + end_of_seg_count++; + } + } + + if (has_end_of_seg) + { + assert (end_of_seg_count == 1); + } + else + { + assert (end_of_seg_count == 0); + } + + for (i = 0; i < free_space_bucket_count; i++) + { + assert (free_space_buckets[i].count_add == 0); + } + } +#endif //_DEBUG + + uint8_t* fit (uint8_t* old_loc, + size_t plug_size + REQD_ALIGN_AND_OFFSET_DCL) + { + if (old_loc) + { +#ifdef SHORT_PLUGS + assert (!is_plug_padded (old_loc)); +#endif //SHORT_PLUGS + assert (!node_realigned (old_loc)); + } + + size_t saved_plug_size = plug_size; + +#ifdef FEATURE_STRUCTALIGN + _ASSERTE(requiredAlignment == DATA_ALIGNMENT && false); +#endif // FEATURE_STRUCTALIGN + + size_t plug_size_to_fit = plug_size; + +#ifdef RESPECT_LARGE_ALIGNMENT + plug_size_to_fit += switch_alignment_size(FALSE); +#endif //RESPECT_LARGE_ALIGNMENT + + int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size))); + ptrdiff_t i; + uint8_t* new_address = 0; + + if (plug_power2 < base_power2) + { + plug_power2 = base_power2; + } + + int chosen_power2 = plug_power2 - base_power2; + retry: + for (i = chosen_power2; i < free_space_bucket_count; i++) + { + if (free_space_buckets[i].count_fit != 0) + { + break; + } + chosen_power2++; + } + + dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting plug len %zd (2^%d) using 2^%d free space", + heap_num, + plug_size, + plug_power2, + (chosen_power2 + base_power2))); + + assert (i < free_space_bucket_count); + + seg_free_space* bucket_free_space = free_space_buckets[chosen_power2].free_space; + ptrdiff_t free_space_count = free_space_buckets[chosen_power2].count_fit; + size_t new_free_space_size = 0; + BOOL can_fit = FALSE; + size_t pad = 0; + + for (i = 0; i < free_space_count; i++) + { + size_t free_space_size = 0; + pad = 0; + + if (bucket_free_space[i].is_plug) + { + mark* m = (mark*)(bucket_free_space[i].start); + uint8_t* plug_free_space_start = pinned_plug (m) - pinned_len (m); + + if (!((old_loc == 0) || same_large_alignment_p (old_loc, plug_free_space_start))) + { + pad = switch_alignment_size (FALSE); + } + + plug_size = saved_plug_size + pad; + + free_space_size = pinned_len (m); + new_address = pinned_plug (m) - pinned_len (m); + + if (free_space_size >= (plug_size + Align (min_obj_size)) || + free_space_size == plug_size) + { + new_free_space_size = free_space_size - plug_size; + pinned_len (m) = new_free_space_size; +#ifdef SIMPLE_DPRINTF + dprintf (SEG_REUSE_LOG_0, ("[%d]FP: 0x%p->0x%p(%zx)(%zx), [0x%p (2^%d) -> [0x%p (2^%d)", + heap_num, + old_loc, + new_address, + (plug_size - pad), + pad, + pinned_plug (m), + index_of_highest_set_bit (free_space_size), + (pinned_plug (m) - pinned_len (m)), + index_of_highest_set_bit (new_free_space_size))); +#endif //SIMPLE_DPRINTF + + if (pad != 0) + { + set_node_realigned (old_loc); + } + + can_fit = TRUE; + } + } + else + { + heap_segment* seg = (heap_segment*)(bucket_free_space[i].start); + free_space_size = heap_segment_committed (seg) - heap_segment_plan_allocated (seg); + + if (!((old_loc == 0) || same_large_alignment_p (old_loc, heap_segment_plan_allocated (seg)))) + { + pad = switch_alignment_size (FALSE); + } + + plug_size = saved_plug_size + pad; + + if (free_space_size >= (plug_size + Align (min_obj_size)) || + free_space_size == plug_size) + { + new_address = heap_segment_plan_allocated (seg); + new_free_space_size = free_space_size - plug_size; + heap_segment_plan_allocated (seg) = new_address + plug_size; +#ifdef SIMPLE_DPRINTF + dprintf (SEG_REUSE_LOG_0, ("[%d]FS: 0x%p-> 0x%p(%zd) (2^%d) -> 0x%p (2^%d)", + heap_num, + old_loc, + new_address, + (plug_size - pad), + index_of_highest_set_bit (free_space_size), + heap_segment_plan_allocated (seg), + index_of_highest_set_bit (new_free_space_size))); +#endif //SIMPLE_DPRINTF + + if (pad != 0) + set_node_realigned (old_loc); + + can_fit = TRUE; + } + } + + if (can_fit) + { + break; + } + } + + if (!can_fit) + { + assert (chosen_power2 == 0); + chosen_power2 = 1; + goto retry; + } + + new_address += pad; + assert ((chosen_power2 && (i == 0)) || + ((!chosen_power2) && (i < free_space_count))); + + int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size); + + if (new_bucket_power2 < base_power2) + { + new_bucket_power2 = base_power2; + } + + move_bucket (chosen_power2, new_bucket_power2 - base_power2); + + return new_address; + } + + void cleanup () + { + if (free_space_buckets) + { + delete [] free_space_buckets; + } + if (seg_free_space_array) + { + delete [] seg_free_space_array; + } + } +}; +#endif //!USE_REGIONS + +#ifdef FEATURE_PREMORTEM_FINALIZATION +#define REGISTER_FOR_FINALIZATION(_object, _size) \ + hp->finalize_queue->RegisterForFinalization (0, (_object), (_size)) +#else // FEATURE_PREMORTEM_FINALIZATION +#define REGISTER_FOR_FINALIZATION(_object, _size) true +#endif // FEATURE_PREMORTEM_FINALIZATION + +#define CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(_object, _size, _register) do { \ + if ((_object) == NULL || ((_register) && !REGISTER_FOR_FINALIZATION(_object, _size))) \ + { \ + STRESS_LOG_OOM_STACK(_size); \ + return NULL; \ + } \ +} while (false) + +extern uint64_t qpf; +extern double qpf_ms; +extern double qpf_us; + +#ifdef FEATURE_BASICFREEZE +heap_segment* ro_segment_lookup (uint8_t* o); +#endif //FEATURE_BASICFREEZE + +struct imemory_data +{ + uint8_t* memory_base; +}; + +struct numa_reserved_block +{ + uint8_t* memory_base; + size_t block_size; + + numa_reserved_block() : memory_base(nullptr), block_size(0) { } +}; + +struct initial_memory_details +{ + imemory_data *initial_memory; + imemory_data *initial_normal_heap; // points into initial_memory_array + imemory_data *initial_large_heap; // points into initial_memory_array + imemory_data *initial_pinned_heap; // points into initial_memory_array + + size_t block_size_normal; + size_t block_size_large; + size_t block_size_pinned; + + int block_count; // # of blocks in each + int current_block_normal; + int current_block_large; + int current_block_pinned; + + enum + { + ALLATONCE = 1, + EACH_GENERATION, + EACH_BLOCK, + ALLATONCE_SEPARATED_POH, + EACH_NUMA_NODE + }; + + size_t allocation_pattern; + + size_t block_size(int i) + { + switch (i / block_count) + { + case 0: return block_size_normal; + case 1: return block_size_large; + case 2: return block_size_pinned; + default: UNREACHABLE(); + } + }; + + void* get_initial_memory (int gen, int h_number) + { + switch (gen) + { + case soh_gen0: + case soh_gen1: + case soh_gen2: return initial_normal_heap[h_number].memory_base; + case loh_generation: return initial_large_heap[h_number].memory_base; + case poh_generation: return initial_pinned_heap[h_number].memory_base; + default: UNREACHABLE(); + } + }; + + size_t get_initial_size (int gen) + { + switch (gen) + { + case soh_gen0: + case soh_gen1: + case soh_gen2: return block_size_normal; + case loh_generation: return block_size_large; + case poh_generation: return block_size_pinned; + default: UNREACHABLE(); + } + }; + + int numa_reserved_block_count; + numa_reserved_block* numa_reserved_block_table; +}; + +extern initial_memory_details memory_details; + +#if defined(BACKGROUND_GC) && !defined(USE_REGIONS) +#define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE) +#else +#define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE) +#endif //BACKGROUND_GC && !USE_REGIONS + +// min size to decommit to make the OS call worthwhile +#define MIN_DECOMMIT_SIZE (100*OS_PAGE_SIZE) + +#ifdef SERVER_GC + +#ifdef HOST_64BIT + +#define INITIAL_ALLOC ((size_t)((size_t)4*1024*1024*1024)) +#define LHEAP_ALLOC ((size_t)(1024*1024*256)) + +#else + +#define INITIAL_ALLOC ((size_t)(1024*1024*64)) +#define LHEAP_ALLOC ((size_t)(1024*1024*32)) + +#endif // HOST_64BIT + +#else //SERVER_GC + +#ifdef HOST_64BIT + +#define INITIAL_ALLOC ((size_t)(1024*1024*256)) +#define LHEAP_ALLOC ((size_t)(1024*1024*128)) + +#else + +#define INITIAL_ALLOC ((size_t)(1024*1024*16)) +#define LHEAP_ALLOC ((size_t)(1024*1024*16)) + +#endif // HOST_64BIT + +#endif //SERVER_GC + +} // namespace WKS/SVR + +#endif // GC_INTERNAL_H diff --git a/src/coreclr/gc/gcsvr.cpp b/src/coreclr/gc/gcsvr.cpp deleted file mode 100644 index 4d54ca2db58aa2..00000000000000 --- a/src/coreclr/gc/gcsvr.cpp +++ /dev/null @@ -1,7 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#ifdef FEATURE_SVR_GC -#define SERVER_GC 1 -#include "gc.cpp" -#endif // FEATURE_SVR_GC diff --git a/src/coreclr/gc/gcwks.cpp b/src/coreclr/gc/gcwks.cpp deleted file mode 100644 index 886e199a29efb4..00000000000000 --- a/src/coreclr/gc/gcwks.cpp +++ /dev/null @@ -1,8 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#ifdef SERVER_GC -#undef SERVER_GC -#endif - -#include "gc.cpp" diff --git a/src/coreclr/gc/init.cpp b/src/coreclr/gc/init.cpp index ccf0b35b3d312c..2be60b63ab0a35 100644 --- a/src/coreclr/gc/init.cpp +++ b/src/coreclr/gc/init.cpp @@ -1,6 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR { +#else // SERVER_GC +namespace WKS { +#endif // SERVER_GC + #ifdef WRITE_WATCH void hardware_write_watch_api_supported() { @@ -1242,6 +1250,11 @@ size_t gc_heap::get_gen0_min_size() return gen0size; } +#ifndef HOST_64BIT +// Max size of heap hard limit (2^31) to be able to be aligned and rounded up on power of 2 and not overflow +const size_t max_heap_hard_limit = (size_t)2 * (size_t)1024 * (size_t)1024 * (size_t)1024; +#endif //!HOST_64BIT + bool gc_heap::compute_hard_limit_from_heap_limits() { #ifndef HOST_64BIT @@ -1552,3 +1565,5 @@ int gc_heap::refresh_memory_limit() return (int)status; } + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/interface.cpp b/src/coreclr/gc/interface.cpp index 40fcc1f46b51ae..91691a4694d802 100644 --- a/src/coreclr/gc/interface.cpp +++ b/src/coreclr/gc/interface.cpp @@ -1,6 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + class NoGCRegionLockHolder { public: @@ -14,6 +24,42 @@ class NoGCRegionLockHolder leave_spin_lock_noinstru(&g_no_gc_lock); } }; + +inline +CObjectHeader* gc_heap::allocate (size_t jsize, alloc_context* acontext, uint32_t flags) +{ + size_t size = Align (jsize); + assert (size >= Align (min_obj_size)); + { + retry: + uint8_t* result = acontext->alloc_ptr; + acontext->alloc_ptr+=size; + if (acontext->alloc_ptr <= acontext->alloc_limit) + { + CObjectHeader* obj = (CObjectHeader*)result; + assert (obj != 0); + return obj; + } + else + { + acontext->alloc_ptr -= size; + +#ifdef _MSC_VER +#pragma inline_depth(0) +#endif //_MSC_VER + + if (! allocate_more_space (acontext, size, flags, 0)) + return 0; + +#ifdef _MSC_VER +#pragma inline_depth(20) +#endif //_MSC_VER + + goto retry; + } + } +} + void GCHeap::Shutdown() { // This does not work for standalone GC on Windows because windows closed the file @@ -2736,3 +2782,5 @@ int GCHeap::RefreshMemoryLimit() { return gc_heap::refresh_memory_limit(); } + +} // namespace SVR/WKS diff --git a/src/coreclr/gc/mark_phase.cpp b/src/coreclr/gc/mark_phase.cpp index 9948265b6b763f..eb355e76154a4a 100644 --- a/src/coreclr/gc/mark_phase.cpp +++ b/src/coreclr/gc/mark_phase.cpp @@ -1,6 +1,20 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + +#ifdef MULTIPLE_HEAPS +gc_heap* seg_mapping_table_heap_of_gc (uint8_t* o); +#endif //MULTIPLE_HEAPS + inline size_t clear_special_bits (uint8_t* node) { @@ -97,24 +111,6 @@ size_t gc_heap::deque_pinned_plug () return m; } -inline -mark* gc_heap::pinned_plug_of (size_t bos) -{ - return &mark_stack_array [ bos ]; -} - -inline -mark* gc_heap::oldest_pin () -{ - return pinned_plug_of (mark_stack_bos); -} - -inline -BOOL gc_heap::pinned_plug_que_empty_p () -{ - return (mark_stack_bos == mark_stack_tos); -} - inline mark* gc_heap::before_oldest_pin() { @@ -143,18 +139,22 @@ void gc_heap::make_mark_stack (mark* arr) #endif //MH_SC_MARK } -#ifdef BACKGROUND_GC inline -size_t& gc_heap::bpromoted_bytes(int thread) +gc_heap* gc_heap::heap_of_gc (uint8_t* o) { #ifdef MULTIPLE_HEAPS - return g_bpromoted [thread*16]; + if (o == 0) + return g_heaps [0]; + + gc_heap* hp = seg_mapping_table_heap_of_gc (o); + return (hp ? hp : g_heaps[0]); #else //MULTIPLE_HEAPS - UNREFERENCED_PARAMETER(thread); - return g_bpromoted; + UNREFERENCED_PARAMETER(o); + return __this; #endif //MULTIPLE_HEAPS } +#ifdef BACKGROUND_GC void gc_heap::make_background_mark_stack (uint8_t** arr) { background_mark_stack_array = arr; @@ -169,36 +169,12 @@ void gc_heap::make_c_mark_list (uint8_t** arr) c_mark_list_length = 1 + (OS_PAGE_SIZE / MIN_OBJECT_SIZE); } -inline -unsigned int gc_heap::mark_array_marked(uint8_t* add) -{ - return mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add)); -} - inline BOOL gc_heap::is_mark_bit_set (uint8_t* add) { return (mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add))); } -inline -void gc_heap::mark_array_set_marked (uint8_t* add) -{ - size_t index = mark_word_of (add); - uint32_t val = (1 << mark_bit_bit_of (add)); -#ifdef MULTIPLE_HEAPS - Interlocked::Or (&(mark_array [index]), val); -#else - mark_array [index] |= val; -#endif -} - -inline -void gc_heap::mark_array_clear_marked (uint8_t* add) -{ - mark_array [mark_word_of (add)] &= ~(1 << mark_bit_bit_of (add)); -} - #ifdef FEATURE_BASICFREEZE // end must be page aligned addresses. void gc_heap::clear_mark_array (uint8_t* from, uint8_t* end) @@ -919,17 +895,6 @@ void gc_heap::grow_mark_list () #ifdef BACKGROUND_GC #ifdef FEATURE_BASICFREEZE -inline -void gc_heap::seg_clear_mark_array_bits_soh (heap_segment* seg) -{ - uint8_t* range_beg = 0; - uint8_t* range_end = 0; - if (bgc_mark_array_range (seg, FALSE, &range_beg, &range_end)) - { - clear_mark_array (range_beg, align_on_mark_word (range_end)); - } -} - inline void gc_heap::seg_set_mark_array_bits_soh (heap_segment* seg) { @@ -1011,13 +976,7 @@ void gc_heap::bgc_clear_batch_mark_array_bits (uint8_t* start, uint8_t* end) #endif //BACKGROUND_GC -inline -BOOL gc_heap::is_mark_set (uint8_t* o) -{ - return marked (o); -} - -inline +/*inline*/ size_t gc_heap::get_promoted_bytes() { #ifdef USE_REGIONS @@ -1477,24 +1436,6 @@ BOOL gc_heap::gc_mark1 (uint8_t* o) return marked; } -#ifdef USE_REGIONS -inline bool gc_heap::is_in_gc_range (uint8_t* o) -{ -#ifdef FEATURE_BASICFREEZE - // we may have frozen objects in read only segments - // outside of the reserved address range of the gc heap - assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) || - (o == nullptr) || (ro_segment_lookup (o) != nullptr)); -#else //FEATURE_BASICFREEZE - // without frozen objects, every non-null pointer must be - // within the heap - assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address)); -#endif //FEATURE_BASICFREEZE - return ((gc_low <= o) && (o < gc_high)); -} - -#endif //USE_REGIONS - inline BOOL gc_heap::gc_mark (uint8_t* o, uint8_t* low, uint8_t* high, int condemned_gen) { @@ -2871,24 +2812,6 @@ void gc_heap::fire_mark_event (int root_type, size_t& current_promoted_bytes, si #endif // FEATURE_EVENT_TRACE } -#ifdef FEATURE_EVENT_TRACE -inline -void gc_heap::record_mark_time (uint64_t& mark_time, - uint64_t& current_mark_time, - uint64_t& last_mark_time) -{ - if (informational_event_enabled_p) - { - current_mark_time = GetHighPrecisionTimeStamp(); - mark_time = limit_time_to_uint32 (current_mark_time - last_mark_time); - dprintf (3, ("%zd - %zd = %zd", - current_mark_time, last_mark_time, (current_mark_time - last_mark_time))); - last_mark_time = current_mark_time; - } -} - -#endif //FEATURE_EVENT_TRACE - void gc_heap::mark_phase (int condemned_gen_number) { assert (settings.concurrent == FALSE); @@ -3597,22 +3520,6 @@ void gc_heap::mark_phase (int condemned_gen_number) dprintf(2,("---- End of mark phase ----")); } -inline -void gc_heap::pin_object (uint8_t* o, uint8_t** ppObject) -{ - dprintf (3, ("Pinning %zx->%zx", (size_t)ppObject, (size_t)o)); - set_pinned (o); - -#ifdef FEATURE_EVENT_TRACE - if(EVENT_ENABLED(PinObjectAtGCTime)) - { - fire_etw_pin_object_event(o, ppObject); - } -#endif // FEATURE_EVENT_TRACE - - num_pinned_objects++; -} - size_t gc_heap::get_total_pinned_objects() { #ifdef MULTIPLE_HEAPS @@ -3728,7 +3635,7 @@ void gc_heap::grow_mark_list_piece() // if the child object's region is <= condemned_gen. // cg_pointers_found means it's pointing into a lower generation so it's incremented // if the child object's region is < current_gen. -inline void +/*inline*/ void gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, size_t& cg_pointers_found, card_fn fn, uint8_t* nhigh, @@ -4202,3 +4109,5 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating CARD_ n_gen, n_eph, n_card_set, total_cards_cleared, generation_skip_ratio)); } } + +} // namespace SVR/WKS diff --git a/src/coreclr/gc/memory.cpp b/src/coreclr/gc/memory.cpp index cd533a16e8036d..2f18ec90553dce 100644 --- a/src/coreclr/gc/memory.cpp +++ b/src/coreclr/gc/memory.cpp @@ -1,6 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + bool gc_heap::virtual_alloc_commit_for_heap (void* addr, size_t size, int h_number) { #ifdef MULTIPLE_HEAPS @@ -326,6 +336,7 @@ bool gc_heap::decommit_step (uint64_t step_milliseconds) decommit_size += hp->decommit_ephemeral_segment_pages_step (); } #endif //MULTIPLE_HEAPS + return (decommit_size != 0); } @@ -482,3 +493,5 @@ size_t gc_heap::decommit_ephemeral_segment_pages_step () } #endif //MULTIPLE_HEAPS + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/no_gc.cpp b/src/coreclr/gc/no_gc.cpp index 6569f84dead1df..dfcc281e64129e 100644 --- a/src/coreclr/gc/no_gc.cpp +++ b/src/coreclr/gc/no_gc.cpp @@ -1,6 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR { +#else // SERVER_GC +namespace WKS { +#endif // SERVER_GC + void gc_heap::update_collection_counts_for_no_gc() { assert (settings.pause_mode == pause_no_gc); @@ -929,3 +937,5 @@ enable_no_gc_region_callback_status gc_heap::enable_no_gc_callback(NoGCRegionCal return status; } + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/plan_phase.cpp b/src/coreclr/gc/plan_phase.cpp index eee724dad0892d..da04f8b4003793 100644 --- a/src/coreclr/gc/plan_phase.cpp +++ b/src/coreclr/gc/plan_phase.cpp @@ -1,6 +1,19 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + +// If every heap's gen2 or gen3 size is less than this threshold we will do a blocking GC. +const size_t bgc_min_per_heap = 4*1024*1024; + inline BOOL is_induced_blocking (gc_reason reason) { @@ -723,66 +736,156 @@ bool gc_heap::init_table_for_region (int gen_number, heap_segment* region) #endif //USE_REGIONS -// The following 2 methods Use integer division to prevent potential floating point exception. -// FPE may occur if we use floating point division because of speculative execution. -// -// Return the percentage of efficiency (between 0 and 100) of the allocator. -inline -size_t gc_heap::generation_allocator_efficiency_percent (generation* inst) +inline BOOL +gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp) { -#ifdef DYNAMIC_HEAP_COUNT - if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) + BOOL ret = FALSE; + + switch (tp) { - uint64_t total_plan_allocated = generation_total_plan_allocated (inst); - uint64_t condemned_allocated = generation_condemned_allocated (inst); - return ((total_plan_allocated == 0) ? 0 : (100 * (total_plan_allocated - condemned_allocated) / total_plan_allocated)); + case tuning_deciding_condemned_gen: +#ifndef USE_REGIONS + case tuning_deciding_compaction: + case tuning_deciding_expansion: +#endif //USE_REGIONS + case tuning_deciding_full_gc: + { + ret = (!ephemeral_gen_fit_p (tp)); + break; + } +#ifndef USE_REGIONS + case tuning_deciding_promote_ephemeral: + { + size_t new_gen0size = approximate_new_allocation(); + ptrdiff_t plan_ephemeral_size = total_ephemeral_size; + + dprintf (GTC_LOG, ("h%d: plan eph size is %zd, new gen0 is %zd", + heap_number, plan_ephemeral_size, new_gen0size)); + ret = ((soh_segment_size - segment_info_size) < (plan_ephemeral_size + new_gen0size)); + break; + } +#endif //USE_REGIONS + default: + { + assert (!"invalid tuning reason"); + break; + } } - else -#endif //DYNAMIC_HEAP_COUNT + + return ret; +} + +inline BOOL +gc_heap::dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number) +{ + BOOL ret = FALSE; + + switch (tp) { - uint64_t free_obj_space = generation_free_obj_space (inst); - uint64_t free_list_allocated = generation_free_list_allocated (inst); - if ((free_list_allocated + free_obj_space) == 0) - return 0; - return (size_t)((100 * free_list_allocated) / (free_list_allocated + free_obj_space)); + case tuning_deciding_condemned_gen: + { + if (gen_number == max_generation) + { + size_t est_maxgen_free = estimated_reclaim (gen_number); + + uint32_t num_heaps = 1; +#ifdef MULTIPLE_HEAPS + num_heaps = gc_heap::n_heaps; +#endif //MULTIPLE_HEAPS + + size_t min_frag_th = min_reclaim_fragmentation_threshold (num_heaps); + dprintf (GTC_LOG, ("h%d, min frag is %zd", heap_number, min_frag_th)); + ret = (est_maxgen_free >= min_frag_th); + } + else + { + assert (0); + } + break; + } + + default: + break; } + + return ret; } -inline -size_t gc_heap::generation_unusable_fragmentation (generation* inst, int hn) +inline BOOL +gc_heap::dt_estimate_high_frag_p (gc_tuning_point tp, int gen_number, uint64_t available_mem) { -#ifdef DYNAMIC_HEAP_COUNT - if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) - { - uint64_t total_plan_allocated = generation_total_plan_allocated (inst); - uint64_t condemned_allocated = generation_condemned_allocated (inst); - uint64_t unusable_frag = 0; - size_t fo_space = (((ptrdiff_t)generation_free_obj_space (inst) < 0) ? 0 : generation_free_obj_space (inst)); + BOOL ret = FALSE; - if (total_plan_allocated != 0) + switch (tp) + { + case tuning_deciding_condemned_gen: { - unusable_frag = fo_space + (condemned_allocated * generation_free_list_space (inst) / total_plan_allocated); - } + if (gen_number == max_generation) + { + dynamic_data* dd = dynamic_data_of (gen_number); + float est_frag_ratio = 0; + if (dd_current_size (dd) == 0) + { + est_frag_ratio = 1; + } + else if ((dd_fragmentation (dd) == 0) || (dd_fragmentation (dd) + dd_current_size (dd) == 0)) + { + est_frag_ratio = 0; + } + else + { + est_frag_ratio = (float)dd_fragmentation (dd) / (float)(dd_fragmentation (dd) + dd_current_size (dd)); + } - dprintf (3, ("h%d g%d FLa: %Id, ESa: %Id, Ca: %Id | FO: %Id, FL %Id, fl effi %.3f, unusable fl is %Id", - hn, inst->gen_num, - generation_free_list_allocated (inst), generation_end_seg_allocated (inst), (size_t)condemned_allocated, - fo_space, generation_free_list_space (inst), - ((total_plan_allocated == 0) ? 1.0 : ((float)(total_plan_allocated - condemned_allocated) / (float)total_plan_allocated)), - (size_t)unusable_frag)); + size_t est_frag = (dd_fragmentation (dd) + (size_t)((dd_desired_allocation (dd) - dd_new_allocation (dd)) * est_frag_ratio)); + dprintf (GTC_LOG, ("h%d: gen%d: current_size is %zd, frag is %zd, est_frag_ratio is %d%%, estimated frag is %zd", + heap_number, + gen_number, + dd_current_size (dd), + dd_fragmentation (dd), + (int)(est_frag_ratio * 100), + est_frag)); + + uint32_t num_heaps = 1; + +#ifdef MULTIPLE_HEAPS + num_heaps = gc_heap::n_heaps; +#endif //MULTIPLE_HEAPS + uint64_t min_frag_th = min_high_fragmentation_threshold(available_mem, num_heaps); + ret = (est_frag >= min_frag_th); + } + else + { + assert (0); + } + break; + } - return (size_t)unusable_frag; + default: + break; } - else -#endif //DYNAMIC_HEAP_COUNT + + return ret; +} + +inline BOOL +gc_heap::dt_low_card_table_efficiency_p (gc_tuning_point tp) +{ + BOOL ret = FALSE; + + switch (tp) { - uint64_t free_obj_space = generation_free_obj_space (inst); - uint64_t free_list_allocated = generation_free_list_allocated (inst); - uint64_t free_list_space = generation_free_list_space (inst); - if ((free_list_allocated + free_obj_space) == 0) - return 0; - return (size_t)(free_obj_space + (free_obj_space * free_list_space) / (free_list_allocated + free_obj_space)); + case tuning_deciding_condemned_gen: + { + ret = (generation_skip_ratio < generation_skip_ratio_threshold); + break; } + + default: + break; + } + + return ret; } /* @@ -1974,12 +2077,6 @@ void gc_heap::process_ephemeral_boundaries (uint8_t* x, #endif //!USE_REGIONS #ifdef FEATURE_LOH_COMPACTION -inline -BOOL gc_heap::loh_pinned_plug_que_empty_p() -{ - return (loh_pinned_queue_bos == loh_pinned_queue_tos); -} - void gc_heap::loh_set_allocator_next_pin() { if (!(loh_pinned_plug_que_empty_p())) @@ -2005,12 +2102,6 @@ size_t gc_heap::loh_deque_pinned_plug () return m; } -inline -mark* gc_heap::loh_pinned_plug_of (size_t bos) -{ - return &loh_pinned_queue[bos]; -} - inline mark* gc_heap::loh_oldest_pin() { @@ -2198,20 +2289,6 @@ BOOL gc_heap::loh_compaction_requested() return (loh_compaction_always_p || (loh_compaction_mode != loh_compaction_default)); } -inline -void gc_heap::check_loh_compact_mode (BOOL all_heaps_compacted_p) -{ - if (settings.loh_compaction && (loh_compaction_mode == loh_compaction_once)) - { - if (all_heaps_compacted_p) - { - // If the compaction mode says to compact once and we are going to compact LOH, - // we need to revert it back to no compaction. - loh_compaction_mode = loh_compaction_default; - } - } -} - BOOL gc_heap::plan_loh() { #ifdef FEATURE_EVENT_TRACE @@ -2482,8 +2559,8 @@ void gc_heap::record_interesting_data_point (interesting_data_point idp) #else UNREFERENCED_PARAMETER(idp); #endif //GC_CONFIG_DRIVEN -} +} #ifdef USE_REGIONS void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num) { @@ -3223,6 +3300,17 @@ inline void save_allocated(heap_segment* seg) } } +#ifdef USE_INTROSORT +#define _sort introsort::sort +#elif defined(USE_VXSORT) +// in this case we have do_vxsort which takes an additional range that +// all items to be sorted are contained in +// so do not #define _sort +#else //USE_INTROSORT +#define _sort qsort1 +void qsort1(uint8_t** low, uint8_t** high, unsigned int depth); +#endif //USE_INTROSORT + void gc_heap::plan_phase (int condemned_gen_number) { size_t old_gen2_allocated = 0; @@ -5977,23 +6065,6 @@ void gc_heap::sweep_region_in_plan (heap_segment* region, } } -inline -void gc_heap::check_demotion_helper_sip (uint8_t** pval, int parent_gen_num, uint8_t* parent_loc) -{ - uint8_t* child_object = *pval; - if (!is_in_heap_range (child_object)) - return; - assert (child_object != nullptr); - int child_object_plan_gen = get_region_plan_gen_num (child_object); - - if (child_object_plan_gen < parent_gen_num) - { - set_card (card_of (parent_loc)); - } - - dprintf (3, ("SCS %d, %d", child_object_plan_gen, parent_gen_num)); -} - #endif //USE_REGIONS #ifndef USE_REGIONS #ifdef SEG_REUSE_STATS @@ -8367,3 +8438,5 @@ BOOL gc_heap::should_do_sweeping_gc (BOOL compact_p) } #endif //GC_CONFIG_DRIVEN + +} // namespace SVR/WKS diff --git a/src/coreclr/gc/region_allocator.cpp b/src/coreclr/gc/region_allocator.cpp index c30493055ee204..fae6b7f4b0a51f 100644 --- a/src/coreclr/gc/region_allocator.cpp +++ b/src/coreclr/gc/region_allocator.cpp @@ -1,8 +1,18 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" #ifdef USE_REGIONS + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + bool region_allocator::init (uint8_t* start, uint8_t* end, size_t alignment, uint8_t** lowest, uint8_t** highest) { uint8_t* actual_start = start; @@ -488,4 +498,7 @@ void region_allocator::move_highest_free_regions (int64_t n, bool small_region_p current_index -= current_num_units; } } + +} // namespace WKS/SVR + #endif //USE_REGIONS diff --git a/src/coreclr/gc/region_free_list.cpp b/src/coreclr/gc/region_free_list.cpp index 24dfc127baa7e2..98eb10bbb9b545 100644 --- a/src/coreclr/gc/region_free_list.cpp +++ b/src/coreclr/gc/region_free_list.cpp @@ -1,8 +1,18 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" #ifdef USE_REGIONS + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + region_free_list::region_free_list() : num_free_regions (0), size_free_regions (0), size_committed_in_free_regions (0), @@ -480,4 +490,7 @@ void region_free_list::sort_by_committed_and_age() } tail_free_region = prev; } + +} // namespace WKS/SVR + #endif //USE_REGIONS diff --git a/src/coreclr/gc/regions_segments.cpp b/src/coreclr/gc/regions_segments.cpp index 613b96468a958f..26417f2c83679c 100644 --- a/src/coreclr/gc/regions_segments.cpp +++ b/src/coreclr/gc/regions_segments.cpp @@ -1,6 +1,45 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + +inline +uint8_t* align_on_segment (uint8_t* add) +{ + return (uint8_t*)((size_t)(add + (((size_t)1 << gc_heap::min_segment_size_shr) - 1)) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1)); +} + +#ifdef FEATURE_BASICFREEZE +inline +size_t ro_seg_begin_index (heap_segment* seg) +{ +#ifdef USE_REGIONS + size_t begin_index = (size_t)heap_segment_mem (seg) >> gc_heap::min_segment_size_shr; +#else + size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr; +#endif //USE_REGIONS + begin_index = max (begin_index, (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr); + return begin_index; +} + +inline +size_t ro_seg_end_index (heap_segment* seg) +{ + size_t end_index = (size_t)(heap_segment_reserved (seg) - 1) >> gc_heap::min_segment_size_shr; + end_index = min (end_index, (size_t)g_gc_highest_address >> gc_heap::min_segment_size_shr); + return end_index; +} + +#endif //FEATURE_BASICFREEZE + size_t size_seg_mapping_table_of (uint8_t* from, uint8_t* end) { from = align_lower_segment (from); @@ -17,12 +56,6 @@ size_t size_region_to_generation_table_of (uint8_t* from, uint8_t* end) return sizeof (uint8_t)*((size_t)(end - from) >> gc_heap::min_segment_size_shr); } -inline -size_t seg_mapping_word_of (uint8_t* add) -{ - return (size_t)add >> gc_heap::min_segment_size_shr; -} - #ifdef FEATURE_BASICFREEZE void seg_mapping_table_add_ro_segment (heap_segment* seg) { @@ -1087,7 +1120,11 @@ bool gc_heap::is_region_demoted (uint8_t* obj) return demoted_p; } -inline +#ifdef USE_REGIONS +static GCSpinLock write_barrier_spin_lock; +#endif //USE_REGIONS + +/*inline*/ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) { assert (gen_num < (1 << (sizeof (uint8_t) * 8))); @@ -1164,67 +1201,6 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num) } } -inline -void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num, bool replace_p) -{ - int gen_num = heap_segment_gen_num (region); - int supposed_plan_gen_num = get_plan_gen_num (gen_num); - dprintf (REGIONS_LOG, ("h%d setting plan gen on %p->%p(was gen%d) to %d(should be: %d) %s", - heap_number, region, - heap_segment_mem (region), - gen_num, plan_gen_num, - supposed_plan_gen_num, - ((plan_gen_num < supposed_plan_gen_num) ? "DEMOTED" : "ND"))); - region_info region_info_bits_to_set = (region_info)(plan_gen_num << RI_PLAN_GEN_SHR); - if ((plan_gen_num < supposed_plan_gen_num) && (heap_segment_pinned_survived (region) != 0)) - { - if (!settings.demotion) - { - settings.demotion = TRUE; - } - get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit); - region->flags |= heap_segment_flags_demoted; - region_info_bits_to_set = (region_info)(region_info_bits_to_set | RI_DEMOTED); - } - else - { - region->flags &= ~heap_segment_flags_demoted; - } - - // If replace_p is true, it means we need to move a region from its original planned gen to this new gen. - if (replace_p) - { - int original_plan_gen_num = heap_segment_plan_gen_num (region); - planned_regions_per_gen[original_plan_gen_num]--; - } - - planned_regions_per_gen[plan_gen_num]++; - dprintf (REGIONS_LOG, ("h%d g%d %zx(%zx) -> g%d (total %d region planned in g%d)", - heap_number, heap_segment_gen_num (region), (size_t)region, heap_segment_mem (region), plan_gen_num, planned_regions_per_gen[plan_gen_num], plan_gen_num)); - - heap_segment_plan_gen_num (region) = plan_gen_num; - - uint8_t* region_start = get_region_start (region); - uint8_t* region_end = heap_segment_reserved (region); - - size_t region_index_start = get_basic_region_index_for_address (region_start); - size_t region_index_end = get_basic_region_index_for_address (region_end); - for (size_t region_index = region_index_start; region_index < region_index_end; region_index++) - { - assert (plan_gen_num <= max_generation); - map_region_to_generation[region_index] = (region_info)(region_info_bits_to_set | (map_region_to_generation[region_index] & ~(RI_PLAN_GEN_MASK|RI_DEMOTED))); - } -} - -inline -void gc_heap::set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_num) -{ - if (!heap_segment_swept_in_plan (region)) - { - set_region_plan_gen_num (region, plan_gen_num); - } -} - void gc_heap::set_region_sweep_in_plan (heap_segment*region) { heap_segment_swept_in_plan (region) = true; @@ -2385,3 +2361,5 @@ void gc_heap::generation_delete_heap_segment (generation* gen, } #endif //BACKGROUND_GC + +} // namespace SVR/WKS diff --git a/src/coreclr/gc/relocate_compact.cpp b/src/coreclr/gc/relocate_compact.cpp index 7d8caa5e9926bb..161dc211b3e482 100644 --- a/src/coreclr/gc/relocate_compact.cpp +++ b/src/coreclr/gc/relocate_compact.cpp @@ -1,6 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + void memcopy (uint8_t* dmem, uint8_t* smem, size_t size) { const size_t sz4ptr = sizeof(PTR_PTR)*4; @@ -53,6 +63,24 @@ bool gc_heap::should_check_brick_for_reloc (uint8_t* o) return (map_region_to_generation_skewed[skewed_basic_region_index] & (RI_SIP|RI_GEN_MASK)) <= settings.condemned_generation; } +inline +void gc_heap::check_demotion_helper_sip (uint8_t** pval, int parent_gen_num, uint8_t* parent_loc) +{ + uint8_t* child_object = *pval; + if (!is_in_heap_range (child_object)) + return; + + assert (child_object != nullptr); + int child_object_plan_gen = get_region_plan_gen_num (child_object); + + if (child_object_plan_gen < parent_gen_num) + { + set_card (card_of (parent_loc)); + } + + dprintf (3, ("SCS %d, %d", child_object_plan_gen, parent_gen_num)); +} + #endif //USE_REGIONS #ifdef FEATURE_LOH_COMPACTION @@ -848,6 +876,18 @@ void gc_heap::verify_pins_with_post_plug_info (const char* msg) #endif // _DEBUG && VERIFY_HEAP } +#ifdef COLLECTIBLE_CLASS +// We don't want to burn another ptr size space for pinned plugs to record this so just +// set the card unconditionally for collectible objects if we are demoting. +inline void gc_heap::unconditional_set_card_collectible (uint8_t* obj) +{ + if (settings.demotion) + { + set_card (card_of (obj)); + } +} +#endif //COLLECTIBLE_CLASS + void gc_heap::relocate_shortened_survivor_helper (uint8_t* plug, uint8_t* plug_end, mark* pinned_plug_entry) { uint8_t* x = plug; @@ -1013,12 +1053,6 @@ void gc_heap::relocate_survivors_in_brick (uint8_t* tree, relocate_args* args) } } -inline -void gc_heap::update_oldest_pinned_plug() -{ - oldest_pinned_plug = (pinned_plug_que_empty_p() ? 0 : pinned_plug (oldest_pin())); -} - heap_segment* gc_heap::get_start_segment (generation* gen) { heap_segment* start_heap_segment = heap_segment_rw (generation_start_segment (gen)); @@ -2259,3 +2293,5 @@ void gc_heap::relocate_in_uoh_objects (int gen_num) } } } + +} // namespace WKS/SVR diff --git a/src/coreclr/gc/sample/CMakeLists.txt b/src/coreclr/gc/sample/CMakeLists.txt index 34bb8526230c71..28835e250d6361 100644 --- a/src/coreclr/gc/sample/CMakeLists.txt +++ b/src/coreclr/gc/sample/CMakeLists.txt @@ -11,10 +11,29 @@ set(SOURCES ../gceventstatus.cpp ../gcconfig.cpp ../gccommon.cpp - ../gceewks.cpp + ../gcee.cpp ../gchandletable.cpp ../gcscan.cpp - ../gcwks.cpp + ../gc.cpp + ../init.cpp + ../no_gc.cpp + ../finalization.cpp + ../dynamic_tuning.cpp + ../region_free_list.cpp + ../region_allocator.cpp + ../memory.cpp + ../sweep.cpp + ../collect.cpp + ../diagnostics.cpp + ../dynamic_heap_count.cpp + ../card_table.cpp + ../relocate_compact.cpp + ../mark_phase.cpp + ../background.cpp + ../interface.cpp + ../allocation.cpp + ../plan_phase.cpp + ../regions_segments.cpp ../gcload.cpp ../handletable.cpp ../handletablecache.cpp diff --git a/src/coreclr/gc/sample/GCSample.vcxproj b/src/coreclr/gc/sample/GCSample.vcxproj index 0b7e657b35f807..198358729ea7b8 100644 --- a/src/coreclr/gc/sample/GCSample.vcxproj +++ b/src/coreclr/gc/sample/GCSample.vcxproj @@ -94,9 +94,28 @@ - + - + + + + + + + + + + + + + + + + + + + + diff --git a/src/coreclr/gc/sample/GCSample.vcxproj.filters b/src/coreclr/gc/sample/GCSample.vcxproj.filters index 9fac162f4ac83e..6477fad46f3265 100644 --- a/src/coreclr/gc/sample/GCSample.vcxproj.filters +++ b/src/coreclr/gc/sample/GCSample.vcxproj.filters @@ -41,13 +41,70 @@ Source Files - + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + Source Files Source Files - + Source Files @@ -75,4 +132,4 @@ Source Files - \ No newline at end of file + diff --git a/src/coreclr/gc/sweep.cpp b/src/coreclr/gc/sweep.cpp index 25a1825639eb2d..65d752156b9e40 100644 --- a/src/coreclr/gc/sweep.cpp +++ b/src/coreclr/gc/sweep.cpp @@ -1,6 +1,16 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "gcinternal.h" + +#ifdef SERVER_GC +namespace SVR +{ +#else // SERVER_GC +namespace WKS +{ +#endif // SERVER_GC + #ifdef FEATURE_BASICFREEZE inline @@ -602,3 +612,5 @@ void gc_heap::sweep_uoh_objects (int gen_num) _ASSERTE(generation_allocation_segment(gen) != NULL); } +} // namespace WKS/SVR + diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 9f3a80c702358e..4042296d016a0b 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -1,5 +1,29 @@ set(GC_DIR ../../gc) +set(GC_WKS_SVR_SOURCES + ${GC_DIR}/gc.cpp + ${GC_DIR}/init.cpp + ${GC_DIR}/no_gc.cpp + ${GC_DIR}/finalization.cpp + ${GC_DIR}/dynamic_tuning.cpp + ${GC_DIR}/region_free_list.cpp + ${GC_DIR}/region_allocator.cpp + ${GC_DIR}/memory.cpp + ${GC_DIR}/sweep.cpp + ${GC_DIR}/collect.cpp + ${GC_DIR}/diagnostics.cpp + ${GC_DIR}/dynamic_heap_count.cpp + ${GC_DIR}/card_table.cpp + ${GC_DIR}/relocate_compact.cpp + ${GC_DIR}/mark_phase.cpp + ${GC_DIR}/background.cpp + ${GC_DIR}/interface.cpp + ${GC_DIR}/allocation.cpp + ${GC_DIR}/plan_phase.cpp + ${GC_DIR}/regions_segments.cpp + ${GC_DIR}/gcee.cpp +) + set(COMMON_RUNTIME_SOURCES allocheap.cpp rhassert.cpp @@ -44,8 +68,6 @@ set(COMMON_RUNTIME_SOURCES ${GC_DIR}/gcconfig.cpp ${GC_DIR}/gchandletable.cpp ${GC_DIR}/gccommon.cpp - ${GC_DIR}/gceewks.cpp - ${GC_DIR}/gcwks.cpp ${GC_DIR}/gcscan.cpp ${GC_DIR}/handletable.cpp ${GC_DIR}/handletablecache.cpp @@ -62,11 +84,6 @@ if (CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM) ) endif() -set(SERVER_GC_SOURCES - ${GC_DIR}/gceesvr.cpp - ${GC_DIR}/gcsvr.cpp -) - set(STANDALONEGC_DISABLED_SOURCES clrgc.disabled.cpp ) @@ -330,7 +347,7 @@ list(APPEND COMMON_RUNTIME_SOURCES ${GC_HEADERS}) convert_to_absolute_path(COMMON_RUNTIME_SOURCES ${COMMON_RUNTIME_SOURCES}) convert_to_absolute_path(FULL_RUNTIME_SOURCES ${FULL_RUNTIME_SOURCES}) -convert_to_absolute_path(SERVER_GC_SOURCES ${SERVER_GC_SOURCES}) +convert_to_absolute_path(GC_WKS_SVR_SOURCES ${GC_WKS_SVR_SOURCES}) convert_to_absolute_path(STANDALONEGC_DISABLED_SOURCES ${STANDALONEGC_DISABLED_SOURCES}) convert_to_absolute_path(STANDALONEGC_ENABLED_SOURCES ${STANDALONEGC_ENABLED_SOURCES}) convert_to_absolute_path(RUNTIME_SOURCES_ARCH_ASM ${RUNTIME_SOURCES_ARCH_ASM}) diff --git a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt index 74cdeca700a1ae..4dafc728c7970c 100644 --- a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt @@ -22,11 +22,27 @@ if (CLR_CMAKE_TARGET_WIN32) endif() endif (CLR_CMAKE_TARGET_WIN32) -add_library(Runtime.WorkstationGC STATIC ${COMMON_RUNTIME_SOURCES} ${FULL_RUNTIME_SOURCES} ${RUNTIME_ARCH_ASM_OBJECTS}) +add_library(Runtime.GC.Workstation OBJECT ${GC_WKS_SVR_SOURCES}) +add_dependencies(Runtime.GC.Workstation aot_eventing_headers) + +add_library(Runtime.GC.Server OBJECT ${GC_WKS_SVR_SOURCES}) +add_dependencies(Runtime.GC.Server aot_eventing_headers) +target_compile_definitions(Runtime.GC.Server PRIVATE FEATURE_SVR_GC SERVER_GC) + +add_library(Runtime.WorkstationGC STATIC + ${COMMON_RUNTIME_SOURCES} + ${FULL_RUNTIME_SOURCES} + ${RUNTIME_ARCH_ASM_OBJECTS} + $) add_dependencies(Runtime.WorkstationGC aot_eventing_headers) target_link_libraries(Runtime.WorkstationGC PRIVATE aotminipal) -add_library(Runtime.ServerGC STATIC ${COMMON_RUNTIME_SOURCES} ${FULL_RUNTIME_SOURCES} ${SERVER_GC_SOURCES} ${RUNTIME_ARCH_ASM_OBJECTS}) +add_library(Runtime.ServerGC STATIC + ${COMMON_RUNTIME_SOURCES} + ${FULL_RUNTIME_SOURCES} + ${RUNTIME_ARCH_ASM_OBJECTS} + $ + $) add_dependencies(Runtime.ServerGC aot_eventing_headers) target_link_libraries(Runtime.ServerGC PRIVATE aotminipal) @@ -44,7 +60,7 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) add_library(Runtime.VxsortDisabled STATIC ${DUMMY_VXSORT_SOURCES}) endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64) -target_compile_definitions(Runtime.ServerGC PRIVATE -DFEATURE_SVR_GC) +target_compile_definitions(Runtime.ServerGC PRIVATE FEATURE_SVR_GC) if (CLR_CMAKE_TARGET_WIN32) set_target_properties(aotminipal PROPERTIES diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 457cde2dc9e185..d9f280b3d2bf4b 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -507,11 +507,7 @@ set(GC_SOURCES_WKS ../gc/gcconfig.cpp ../gc/gccommon.cpp ../gc/gcscan.cpp - ../gc/gcsvr.cpp - ../gc/gcwks.cpp ../gc/gchandletable.cpp - ../gc/gceesvr.cpp - ../gc/gceewks.cpp ../gc/gcload.cpp ../gc/gcbridge.cpp ../gc/softwarewritewatch.cpp @@ -1038,6 +1034,42 @@ convert_to_absolute_path(VM_SOURCES_WKS_ARCH_ASM ${VM_SOURCES_WKS_ARCH_ASM}) convert_to_absolute_path(VM_SOURCES_DAC ${VM_SOURCES_DAC}) convert_to_absolute_path(VM_SOURCES_WKS_SPECIAL ${VM_SOURCES_WKS_SPECIAL}) +set(GC_WKS_SVR_SOURCES + ../gc/gcee.cpp + ../gc/gc.cpp + ../gc/init.cpp + ../gc/no_gc.cpp + ../gc/finalization.cpp + ../gc/dynamic_tuning.cpp + ../gc/region_free_list.cpp + ../gc/region_allocator.cpp + ../gc/memory.cpp + ../gc/sweep.cpp + ../gc/collect.cpp + ../gc/diagnostics.cpp + ../gc/dynamic_heap_count.cpp + ../gc/card_table.cpp + ../gc/relocate_compact.cpp + ../gc/mark_phase.cpp + ../gc/background.cpp + ../gc/interface.cpp + ../gc/allocation.cpp + ../gc/plan_phase.cpp + ../gc/regions_segments.cpp + ) + +add_library_clr(vm_gc_wks OBJECT ${GC_WKS_SVR_SOURCES}) +target_precompile_headers(vm_gc_wks PRIVATE [["common.h"]]) +target_compile_definitions(vm_gc_wks PRIVATE GC_DESCRIPTOR) +add_dependencies(vm_gc_wks eventing_headers) + +if(FEATURE_SVR_GC) + add_library_clr(vm_gc_svr OBJECT ${GC_WKS_SVR_SOURCES}) + target_precompile_headers(vm_gc_svr PRIVATE [["common.h"]]) + target_compile_definitions(vm_gc_svr PRIVATE GC_DESCRIPTOR SERVER_GC) + add_dependencies(vm_gc_svr eventing_headers) +endif() + if (NOT CLR_CMAKE_TARGET_ARCH_WASM) add_library_clr(cee_dac ${VM_SOURCES_DAC}) add_dependencies(cee_dac eventing_headers)