diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
index c0cf0a1ff4176b..3a04abb06b64cb 100644
--- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
+++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
@@ -175,12 +175,14 @@ if(TARGET coreclr)
         $<TARGET_NAME_IF_EXISTS:coreclrpal>
         ${CLRJIT_STATIC}
         ${CLRINTERPRETER_STATIC}
+        vm_gc_wks
+        $<TARGET_NAME_IF_EXISTS:vm_gc_svr>
         cee_wks_core
         cee_wks
         ${FOUNDATION})
 endif()
 
-target_link_libraries(coreclr_static PUBLIC ${CORECLR_LIBRARIES} ${CORECLR_STATIC_CLRJIT_STATIC} ${CORECLR_STATIC_CLRINTERPRETER_STATIC} cee_wks_core ${CEE_WKS_STATIC} ${FOUNDATION})
+target_link_libraries(coreclr_static PUBLIC ${CORECLR_LIBRARIES} ${CORECLR_STATIC_CLRJIT_STATIC} ${CORECLR_STATIC_CLRINTERPRETER_STATIC} vm_gc_wks $<TARGET_NAME_IF_EXISTS:vm_gc_svr> cee_wks_core ${CEE_WKS_STATIC} ${FOUNDATION})
 target_compile_definitions(coreclr_static PUBLIC CORECLR_EMBEDDED)
 
 if (CLR_CMAKE_HOST_ANDROID)
diff --git a/src/coreclr/gc/CMakeLists.txt b/src/coreclr/gc/CMakeLists.txt
index 30ac2bb7e91ae8..1e88b637af6e0d 100644
--- a/src/coreclr/gc/CMakeLists.txt
+++ b/src/coreclr/gc/CMakeLists.txt
@@ -11,16 +11,12 @@ set(GC_SOURCES
   gcconfig.cpp
   gccommon.cpp
   gcscan.cpp
-  gcsvr.cpp
-  gcwks.cpp
   handletable.cpp
   handletablecore.cpp
   handletablescan.cpp
   objecthandle.cpp
   softwarewritewatch.cpp
   gchandletable.cpp
-  gceesvr.cpp
-  gceewks.cpp
   gcload.cpp
   gcbridge.cpp
   handletablecache.cpp)
@@ -56,6 +52,7 @@ if (CLR_CMAKE_TARGET_WIN32)
     env/gcenv.windows.inl
     env/volatile.h
     gc.h
+    gcinternal.h
     gcconfig.h
     gcbridge.h
     gcdesc.h
@@ -104,6 +101,29 @@ list(APPEND GC_SOURCES ${GC_HEADERS})
 
 convert_to_absolute_path(GC_SOURCES ${GC_SOURCES})
 
+set(GC_WKS_SVR_SOURCES
+  gcee.cpp
+  gc.cpp
+  init.cpp
+  no_gc.cpp
+  finalization.cpp
+  dynamic_tuning.cpp
+  region_free_list.cpp
+  region_allocator.cpp
+  memory.cpp
+  sweep.cpp
+  collect.cpp
+  diagnostics.cpp
+  dynamic_heap_count.cpp
+  card_table.cpp
+  relocate_compact.cpp
+  mark_phase.cpp
+  background.cpp
+  interface.cpp
+  allocation.cpp
+  plan_phase.cpp
+  regions_segments.cpp)
+
 if(FEATURE_STANDALONE_GC)
   if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
     set(BUILD_EXP_GC 1)
@@ -111,7 +131,20 @@ if(FEATURE_STANDALONE_GC)
 
   # clrgcexp is build with standalone+regions
   if (BUILD_EXP_GC)
-    add_library_clr(clrgcexp SHARED ${GC_SOURCES})
+    add_library_clr(clrgcexp_gc_wks OBJECT ${GC_WKS_SVR_SOURCES})
+    add_dependencies(clrgcexp_gc_wks eventing_headers)
+    target_compile_definitions(clrgcexp_gc_wks PRIVATE USE_REGIONS)
+    set(CLRGGCEXP_OBJECTS
+      $<TARGET_OBJECTS:clrgcexp_gc_wks>)
+    if (FEATURE_SVR_GC)
+      add_library_clr(clrgcexp_gc_svr OBJECT ${GC_WKS_SVR_SOURCES})
+      add_dependencies(clrgcexp_gc_svr eventing_headers)
+      target_compile_definitions(clrgcexp_gc_svr PRIVATE SERVER_GC USE_REGIONS)
+      list(APPEND CLRGGCEXP_OBJECTS
+        $<TARGET_OBJECTS:clrgcexp_gc_svr>)
+    endif()
+
+    add_library_clr(clrgcexp SHARED ${GC_SOURCES} ${CLRGGCEXP_OBJECTS})
     add_dependencies(clrgcexp eventing_headers)
     target_link_libraries(clrgcexp PRIVATE ${GC_LINK_LIBRARIES})
     target_link_libraries(clrgcexp PRIVATE gcexp_dll_wks_descriptor)
@@ -122,8 +155,20 @@ if(FEATURE_STANDALONE_GC)
     install_clr(TARGETS clrgcexp DESTINATIONS . COMPONENT runtime)
   endif (BUILD_EXP_GC)
 
+  add_library_clr(clrgc_gc_wks OBJECT ${GC_WKS_SVR_SOURCES})
+  add_dependencies(clrgc_gc_wks eventing_headers)
+  set(CLRGC_OBJECTS
+    $<TARGET_OBJECTS:clrgc_gc_wks>)
+  if(FEATURE_SVR_GC)
+    add_library_clr(clrgc_gc_svr OBJECT ${GC_WKS_SVR_SOURCES})
+    add_dependencies(clrgc_gc_svr eventing_headers)
+    target_compile_definitions(clrgc_gc_svr PRIVATE SERVER_GC)
+    list(APPEND CLRGC_OBJECTS
+      $<TARGET_OBJECTS:clrgc_gc_svr>)
+  endif()
+
   # clrgc is build with standalone+segments
-  add_library_clr(clrgc SHARED ${GC_SOURCES})
+  add_library_clr(clrgc SHARED ${GC_SOURCES} ${CLRGC_OBJECTS})
   add_dependencies(clrgc eventing_headers)
   target_link_libraries(clrgc PRIVATE ${GC_LINK_LIBRARIES})
   target_link_libraries(clrgc PRIVATE gc_dll_wks_descriptor)
diff --git a/src/coreclr/gc/allocation.cpp b/src/coreclr/gc/allocation.cpp
index 947f2330de5a57..171a8286820c29 100644
--- a/src/coreclr/gc/allocation.cpp
+++ b/src/coreclr/gc/allocation.cpp
@@ -1,6 +1,15 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
 
 allocator::allocator (unsigned int num_b, int fbb, alloc_list* b, int gen)
 {
@@ -1321,27 +1330,6 @@ bool gc_heap::new_allocation_allowed (int gen_number)
     return TRUE;
 }
 
-inline
-ptrdiff_t gc_heap::get_desired_allocation (int gen_number)
-{
-    return dd_desired_allocation (dynamic_data_of (gen_number));
-}
-
-inline
-ptrdiff_t  gc_heap::get_new_allocation (int gen_number)
-{
-    return dd_new_allocation (dynamic_data_of (gen_number));
-}
-
-//return the amount allocated so far in gen_number
-inline
-ptrdiff_t  gc_heap::get_allocation (int gen_number)
-{
-    dynamic_data* dd = dynamic_data_of (gen_number);
-
-    return dd_desired_allocation (dd) - dd_new_allocation (dd);
-}
-
 #ifdef SHORT_PLUGS
 inline
 void set_padding_in_expand (uint8_t* old_loc,
@@ -3253,29 +3241,6 @@ allocation_state gc_heap::allocate_soh (int gen_number,
     return soh_alloc_state;
 }
 
-#ifdef BACKGROUND_GC
-inline
-void gc_heap::bgc_track_uoh_alloc()
-{
-    if (current_c_gc_state == c_gc_state_planning)
-    {
-        Interlocked::Increment (&uoh_alloc_thread_count);
-        dprintf (3, ("h%d: inc lc: %d", heap_number, (int32_t)uoh_alloc_thread_count));
-    }
-}
-
-inline
-void gc_heap::bgc_untrack_uoh_alloc()
-{
-    if (current_c_gc_state == c_gc_state_planning)
-    {
-        Interlocked::Decrement (&uoh_alloc_thread_count);
-        dprintf (3, ("h%d: dec lc: %d", heap_number, (int32_t)uoh_alloc_thread_count));
-    }
-}
-
-#endif //BACKGROUND_GC
-
 size_t gc_heap::get_uoh_seg_size (size_t size)
 {
     size_t default_seg_size =
@@ -4551,41 +4516,6 @@ BOOL gc_heap::allocate_more_space(alloc_context* acontext, size_t size,
     return (status == a_state_can_allocate);
 }
 
-inline
-CObjectHeader* gc_heap::allocate (size_t jsize, alloc_context* acontext, uint32_t flags)
-{
-    size_t size = Align (jsize);
-    assert (size >= Align (min_obj_size));
-    {
-    retry:
-        uint8_t*  result = acontext->alloc_ptr;
-        acontext->alloc_ptr+=size;
-        if (acontext->alloc_ptr <= acontext->alloc_limit)
-        {
-            CObjectHeader* obj = (CObjectHeader*)result;
-            assert (obj != 0);
-            return obj;
-        }
-        else
-        {
-            acontext->alloc_ptr -= size;
-
-#ifdef _MSC_VER
-#pragma inline_depth(0)
-#endif //_MSC_VER
-
-            if (! allocate_more_space (acontext, size, flags, 0))
-                return 0;
-
-#ifdef _MSC_VER
-#pragma inline_depth(20)
-#endif //_MSC_VER
-
-            goto retry;
-        }
-    }
-}
-
 void  gc_heap::leave_allocation_segment (generation* gen)
 {
     adjust_limit (0, 0, gen);
@@ -5417,15 +5347,6 @@ generation*  gc_heap::ensure_ephemeral_heap_segment (generation* consing_gen)
 
 #endif //!USE_REGIONS
 
-inline
-void gc_heap::init_alloc_info (generation* gen, heap_segment* seg)
-{
-    generation_allocation_segment (gen) = seg;
-    generation_allocation_pointer (gen) = heap_segment_mem (seg);
-    generation_allocation_limit (gen) = generation_allocation_pointer (gen);
-    generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
-}
-
 inline
 heap_segment* gc_heap::get_next_alloc_seg (generation* gen)
 {
@@ -5880,3 +5801,5 @@ CObjectHeader* gc_heap::allocate_uoh_object (size_t jsize, uint32_t flags, int g
 
     return obj;
 }
+
+} // namespace SVR/WKS
diff --git a/src/coreclr/gc/background.cpp b/src/coreclr/gc/background.cpp
index 3fc82188afa489..f151926d9309e7 100644
--- a/src/coreclr/gc/background.cpp
+++ b/src/coreclr/gc/background.cpp
@@ -1,6 +1,15 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
 
 // static
 
@@ -221,47 +230,6 @@ void gc_heap::concurrent_print_time_delta (const char* msg)
 }
 
 #ifdef BACKGROUND_GC
-inline
-BOOL gc_heap::background_marked (uint8_t* o)
-{
-    return mark_array_marked (o);
-}
-
-inline
-BOOL gc_heap::background_mark1 (uint8_t* o)
-{
-    BOOL to_mark = !mark_array_marked (o);
-
-    dprintf (3, ("b*%zx*b(%d)", (size_t)o, (to_mark ? 1 : 0)));
-    if (to_mark)
-    {
-        mark_array_set_marked (o);
-        dprintf (4, ("n*%zx*n", (size_t)o));
-        return TRUE;
-    }
-    else
-        return FALSE;
-}
-
-// TODO: we could consider filtering out NULL's here instead of going to
-// look for it on other heaps
-inline
-BOOL gc_heap::background_mark (uint8_t* o, uint8_t* low, uint8_t* high)
-{
-    BOOL marked = FALSE;
-    if ((o >= low) && (o < high))
-        marked = background_mark1 (o);
-#ifdef MULTIPLE_HEAPS
-    else if (o)
-    {
-        gc_heap* hp = heap_of (o);
-        assert (hp);
-        if ((o >= hp->background_saved_lowest_address) && (o < hp->background_saved_highest_address))
-            marked = background_mark1 (o);
-    }
-#endif //MULTIPLE_HEAPS
-    return marked;
-}
 
 #ifdef USE_REGIONS
 void gc_heap::set_background_overflow_p (uint8_t* oo)
@@ -3453,7 +3421,7 @@ void gc_heap::process_background_segment_end (heap_segment* seg,
     bgc_verify_mark_array_cleared (seg);
 }
 
-inline
+//inline
 BOOL gc_heap::fgc_should_consider_object (uint8_t* o,
                                           heap_segment* seg,
                                           BOOL consider_bgc_mark_p,
@@ -4599,3 +4567,5 @@ size_t gc_heap::get_mark_array_size (heap_segment* seg)
 }
 
 #endif //USE_REGIONS
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/card_table.cpp b/src/coreclr/gc/card_table.cpp
index 685438a68cbc35..e7e04e501eaa15 100644
--- a/src/coreclr/gc/card_table.cpp
+++ b/src/coreclr/gc/card_table.cpp
@@ -1,6 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 #ifdef CARD_BUNDLE
 
 // Clear the specified card bundle
@@ -130,18 +140,6 @@ BOOL gc_heap::card_bundles_enabled ()
 
 #endif //CARD_BUNDLE
 
-inline
-size_t gc_heap::brick_of (uint8_t* add)
-{
-    return (size_t)(add - lowest_address) / brick_size;
-}
-
-inline
-uint8_t* gc_heap::brick_address (size_t brick)
-{
-    return lowest_address + (brick_size * brick);
-}
-
 void gc_heap::clear_brick_table (uint8_t* from, uint8_t* end)
 {
     size_t from_brick = brick_of (from);
@@ -149,79 +147,6 @@ void gc_heap::clear_brick_table (uint8_t* from, uint8_t* end)
     memset (&brick_table[from_brick], 0, sizeof(brick_table[from_brick])*(end_brick-from_brick));
 }
 
-//codes for the brick entries:
-//entry == 0 -> not assigned
-//entry >0 offset is entry-1
-//entry <0 jump back entry bricks
-inline
-void gc_heap::set_brick (size_t index, ptrdiff_t val)
-{
-    if (val < -32767)
-    {
-        val = -32767;
-    }
-    assert (val < 32767);
-    if (val >= 0)
-        brick_table [index] = (short)val+1;
-    else
-        brick_table [index] = (short)val;
-
-    dprintf (3, ("set brick[%zx] to %d\n", index, (short)val));
-}
-
-inline
-int gc_heap::get_brick_entry (size_t index)
-{
-#ifdef MULTIPLE_HEAPS
-    return VolatileLoadWithoutBarrier(&brick_table [index]);
-#else
-    return brick_table[index];
-#endif
-}
-
-inline
-uint8_t* gc_heap::card_address (size_t card)
-{
-    return  (uint8_t*) (card_size * card);
-}
-
-inline
-size_t gc_heap::card_of ( uint8_t* object)
-{
-    return (size_t)(object) / card_size;
-}
-
-inline
-void gc_heap::clear_card (size_t card)
-{
-    card_table [card_word (card)] =
-        (card_table [card_word (card)] & ~(1 << card_bit (card)));
-    dprintf (3,("Cleared card %zx [%zx, %zx[", card, (size_t)card_address (card),
-              (size_t)card_address (card+1)));
-}
-
-inline
-void gc_heap::set_card (size_t card)
-{
-    size_t word = card_word (card);
-    card_table[word] = (card_table [word] | (1 << card_bit (card)));
-
-#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
-    // Also set the card bundle that corresponds to the card
-    size_t bundle_to_set = cardw_card_bundle(word);
-
-    card_bundle_set(bundle_to_set);
-
-    dprintf (3,("Set card %zx [%zx, %zx[ and bundle %zx", card, (size_t)card_address (card), (size_t)card_address (card+1), bundle_to_set));
-#endif
-}
-
-inline
-BOOL  gc_heap::card_set_p (size_t card)
-{
-    return ( card_table [ card_word (card) ] & (1 << card_bit (card)));
-}
-
 void gc_heap::destroy_card_table_helper (uint32_t* c_table)
 {
     uint8_t* lowest = card_table_lowest_address (c_table);
@@ -1260,8 +1185,7 @@ inline void gc_heap::verify_card_bundle_bits_set(size_t first_card_word, size_t
 #endif
 }
 
-// Verifies that any bundles that are not set represent only cards that are not set.
-inline void gc_heap::verify_card_bundles()
+void gc_heap::verify_card_bundles()
 {
 #ifdef _DEBUG
     size_t lowest_card = card_word (card_of (lowest_address));
@@ -1276,23 +1200,22 @@ inline void gc_heap::verify_card_bundles()
     while (cardb < end_cardb)
     {
         uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb), lowest_card)];
-        uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1), highest_card)];
+        uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb + 1), highest_card)];
 
         if (card_bundle_set_p (cardb) == 0)
         {
-            // Verify that no card is set
             while (card_word < card_word_end)
             {
                 if (*card_word != 0)
                 {
                     dprintf  (3, ("gc: %zd, Card word %zx for address %zx set, card_bundle %zx clear",
                             dd_collection_count (dynamic_data_of (0)),
-                            (size_t)(card_word-&card_table[0]),
-                            (size_t)(card_address ((size_t)(card_word-&card_table[0]) * card_word_width)),
+                            (size_t)(card_word - &card_table[0]),
+                            (size_t)(card_address ((size_t)(card_word - &card_table[0]) * card_word_width)),
                             cardb));
                 }
 
-                assert((*card_word)==0);
+                assert((*card_word) == 0);
                 card_word++;
             }
         }
@@ -1369,19 +1292,6 @@ void gc_heap::update_card_table_bundle()
 
 #endif //CARD_BUNDLE
 #endif //WRITE_WATCH
-#ifdef COLLECTIBLE_CLASS
-// We don't want to burn another ptr size space for pinned plugs to record this so just
-// set the card unconditionally for collectible objects if we are demoting.
-inline void
-gc_heap::unconditional_set_card_collectible (uint8_t* obj)
-{
-    if (settings.demotion)
-    {
-        set_card (card_of (obj));
-    }
-}
-
-#endif //COLLECTIBLE_CLASS
 
 //Clear the cards [start_card, end_card[
 void gc_heap::clear_cards (size_t start_card, size_t end_card)
@@ -2004,3 +1914,5 @@ bool gc_heap::find_next_chunk(card_marking_enumerator& card_mark_enumerator, hea
 }
 
 #endif //FEATURE_CARD_MARKING_STEALING
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/collect.cpp b/src/coreclr/gc/collect.cpp
index ee258ac3141b8d..a74d878f1e73b3 100644
--- a/src/coreclr/gc/collect.cpp
+++ b/src/coreclr/gc/collect.cpp
@@ -1,6 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 wait_full_gc_status gc_heap::full_gc_wait (GCEvent *event, int time_out_ms)
 {
 #ifdef MULTIPLE_HEAPS
@@ -1722,3 +1732,5 @@ void gc_heap::do_post_gc()
         mark_list_overflow = false;
     }
 }
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/diagnostics.cpp b/src/coreclr/gc/diagnostics.cpp
index 17f485d6d6af62..78d3cd7304144c 100644
--- a/src/coreclr/gc/diagnostics.cpp
+++ b/src/coreclr/gc/diagnostics.cpp
@@ -1,6 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 void gc_heap::add_to_history_per_heap()
 {
 #if defined(GC_HISTORY) && defined(BACKGROUND_GC)
@@ -1785,3 +1795,5 @@ void gc_heap::walk_read_only_segment(heap_segment *seg, void *pvContext, object_
 }
 
 #endif //FEATURE_BASICFREEZE
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/dynamic_heap_count.cpp b/src/coreclr/gc/dynamic_heap_count.cpp
index 35599e1441f126..853a1c5ffe58cb 100644
--- a/src/coreclr/gc/dynamic_heap_count.cpp
+++ b/src/coreclr/gc/dynamic_heap_count.cpp
@@ -1,6 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 #ifdef USE_REGIONS
 #ifdef DYNAMIC_HEAP_COUNT
 void gc_heap::check_decommissioned_heap()
@@ -1535,3 +1545,5 @@ void gc_heap::add_to_bgc_hc_history (hc_record_stage stage)
 
 #endif //DYNAMIC_HEAP_COUNT
 #endif //USE_REGIONS
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/dynamic_tuning.cpp b/src/coreclr/gc/dynamic_tuning.cpp
index d43357677984ff..9e315b5e5ca6cb 100644
--- a/src/coreclr/gc/dynamic_tuning.cpp
+++ b/src/coreclr/gc/dynamic_tuning.cpp
@@ -1,6 +1,14 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR {
+#else // SERVER_GC
+namespace WKS {
+#endif // SERVER_GC
+
 
 // Things we need to manually initialize:
 // gen0 min_size - based on cache
@@ -42,48 +50,6 @@ static static_data static_data_table[latency_level_last - latency_level_first +
     },
 };
 
-inline BOOL
-gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp)
-{
-    BOOL ret = FALSE;
-
-    switch (tp)
-    {
-        case tuning_deciding_condemned_gen:
-#ifndef USE_REGIONS
-        case tuning_deciding_compaction:
-        case tuning_deciding_expansion:
-#endif //USE_REGIONS
-        case tuning_deciding_full_gc:
-        {
-            ret = (!ephemeral_gen_fit_p (tp));
-            break;
-        }
-#ifndef USE_REGIONS
-        case tuning_deciding_promote_ephemeral:
-        {
-            size_t new_gen0size = approximate_new_allocation();
-            ptrdiff_t plan_ephemeral_size = total_ephemeral_size;
-
-            dprintf (GTC_LOG, ("h%d: plan eph size is %zd, new gen0 is %zd",
-                heap_number, plan_ephemeral_size, new_gen0size));
-            // If we were in no_gc_region we could have allocated a larger than normal segment,
-            // and the next seg we allocate will be a normal sized seg so if we can't fit the new
-            // ephemeral generations there, do an ephemeral promotion.
-            ret = ((soh_segment_size - segment_info_size) < (plan_ephemeral_size + new_gen0size));
-            break;
-        }
-#endif //USE_REGIONS
-        default:
-        {
-            assert (!"invalid tuning reason");
-            break;
-        }
-    }
-
-    return ret;
-}
-
 BOOL
 gc_heap::dt_high_frag_p (gc_tuning_point tp,
                          int gen_number,
@@ -146,132 +112,6 @@ gc_heap::dt_high_frag_p (gc_tuning_point tp,
     return ret;
 }
 
-inline BOOL
-gc_heap::dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number)
-{
-    BOOL ret = FALSE;
-
-    switch (tp)
-    {
-        case tuning_deciding_condemned_gen:
-        {
-            if (gen_number == max_generation)
-            {
-                size_t est_maxgen_free = estimated_reclaim (gen_number);
-
-                uint32_t num_heaps = 1;
-#ifdef MULTIPLE_HEAPS
-                num_heaps = gc_heap::n_heaps;
-#endif //MULTIPLE_HEAPS
-
-                size_t min_frag_th = min_reclaim_fragmentation_threshold (num_heaps);
-                dprintf (GTC_LOG, ("h%d, min frag is %zd", heap_number, min_frag_th));
-                ret = (est_maxgen_free >= min_frag_th);
-            }
-            else
-            {
-                assert (0);
-            }
-            break;
-        }
-
-        default:
-            break;
-    }
-
-    return ret;
-}
-
-// DTREVIEW: Right now we only estimate gen2 fragmentation.
-// on 64-bit though we should consider gen1 or even gen0 fragmentation as
-// well
-inline BOOL
-gc_heap::dt_estimate_high_frag_p (gc_tuning_point tp, int gen_number, uint64_t available_mem)
-{
-    BOOL ret = FALSE;
-
-    switch (tp)
-    {
-        case tuning_deciding_condemned_gen:
-        {
-            if (gen_number == max_generation)
-            {
-                dynamic_data* dd = dynamic_data_of (gen_number);
-                float est_frag_ratio = 0;
-                if (dd_current_size (dd) == 0)
-                {
-                    est_frag_ratio = 1;
-                }
-                else if ((dd_fragmentation (dd) == 0) || (dd_fragmentation (dd) + dd_current_size (dd) == 0))
-                {
-                    est_frag_ratio = 0;
-                }
-                else
-                {
-                    est_frag_ratio = (float)dd_fragmentation (dd) / (float)(dd_fragmentation (dd) + dd_current_size (dd));
-                }
-
-                size_t est_frag = (dd_fragmentation (dd) + (size_t)((dd_desired_allocation (dd) - dd_new_allocation (dd)) * est_frag_ratio));
-                dprintf (GTC_LOG, ("h%d: gen%d: current_size is %zd, frag is %zd, est_frag_ratio is %d%%, estimated frag is %zd",
-                    heap_number,
-                    gen_number,
-                    dd_current_size (dd),
-                    dd_fragmentation (dd),
-                    (int)(est_frag_ratio * 100),
-                    est_frag));
-
-                uint32_t num_heaps = 1;
-
-#ifdef MULTIPLE_HEAPS
-                num_heaps = gc_heap::n_heaps;
-#endif //MULTIPLE_HEAPS
-                uint64_t min_frag_th = min_high_fragmentation_threshold(available_mem, num_heaps);
-                //dprintf (GTC_LOG, ("h%d, min frag is %zd", heap_number, min_frag_th));
-                ret = (est_frag >= min_frag_th);
-            }
-            else
-            {
-                assert (0);
-            }
-            break;
-        }
-
-        default:
-            break;
-    }
-
-    return ret;
-}
-
-inline BOOL
-gc_heap::dt_low_card_table_efficiency_p (gc_tuning_point tp)
-{
-    BOOL ret = FALSE;
-
-    switch (tp)
-    {
-    case tuning_deciding_condemned_gen:
-    {
-        /* promote into max-generation if the card table has too many
-        * generation faults besides the n -> 0
-        */
-        ret = (generation_skip_ratio < generation_skip_ratio_threshold);
-        break;
-    }
-
-    default:
-        break;
-    }
-
-    return ret;
-}
-
-inline BOOL
-gc_heap::dt_high_memory_load_p()
-{
-    return ((settings.entry_memory_load >= high_memory_load_th) || g_low_memory_status);
-}
-
 #if defined(USE_REGIONS)
 bool gc_heap::near_heap_hard_limit_p()
 {
@@ -2217,26 +2057,6 @@ size_t gc_heap::joined_youngest_desired (size_t new_allocation)
 
 #endif //HOST_64BIT
 
-inline
-gc_history_global* gc_heap::get_gc_data_global()
-{
-#ifdef BACKGROUND_GC
-    return (settings.concurrent ? &bgc_data_global : &gc_data_global);
-#else
-    return &gc_data_global;
-#endif //BACKGROUND_GC
-}
-
-inline
-gc_history_per_heap* gc_heap::get_gc_data_per_heap()
-{
-#ifdef BACKGROUND_GC
-    return (settings.concurrent ? &bgc_data_per_heap : &gc_data_per_heap);
-#else
-    return &gc_data_per_heap;
-#endif //BACKGROUND_GC
-}
-
 void gc_heap::compute_new_dynamic_data (int gen_number)
 {
     _ASSERTE(gen_number >= 0);
@@ -2854,3 +2674,5 @@ void gc_heap::accumulate_committed_bytes(heap_segment* seg, size_t& committed_by
         seg = heap_segment_next_rw (seg);
     }
 }
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/finalization.cpp b/src/coreclr/gc/finalization.cpp
index 46d8a187b066fc..e85ec827f48bef 100644
--- a/src/coreclr/gc/finalization.cpp
+++ b/src/coreclr/gc/finalization.cpp
@@ -1,6 +1,14 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR {
+#else // SERVER_GC
+namespace WKS {
+#endif // SERVER_GC
+
 void gc_heap::schedule_finalizer_work (FinalizerWorkItem* callback)
 {
     FinalizerWorkItem* prev;
@@ -700,3 +708,5 @@ void gc_heap::walk_finalize_queue (fq_walk_fn fn)
     finalize_queue->WalkFReachableObjects (fn);
 #endif //FEATURE_PREMORTEM_FINALIZATION
 }
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp
index ca84b2c8599de5..24839af5b0d42e 100644
--- a/src/coreclr/gc/gc.cpp
+++ b/src/coreclr/gc/gc.cpp
@@ -16,47 +16,7 @@
 // allocation helpers in gcscan.cpp
 //
 
-#include "common.h"
-#include "gcenv.h"
-
-#include "gc.h"
-#include "gcscan.h"
-#include "gcdesc.h"
-#include "softwarewritewatch.h"
-#include "handletable.h"
-#include "handletable.inl"
-#include "gcenv.inl"
-#include "gceventstatus.h"
-#include <minipal/memorybarrierprocesswide.h>
-
-// If FEATURE_INTERPRETER is set, always enable the GC side of FEATURE_CONSERVATIVE_GC
-#ifdef FEATURE_INTERPRETER
-#ifndef FEATURE_CONSERVATIVE_GC
-#define FEATURE_CONSERVATIVE_GC
-#endif
-#endif // FEATURE_INTERPRETER
-
-#ifdef __INTELLISENSE__
-#if defined(FEATURE_SVR_GC)
-
-#define SERVER_GC 1
-
-#else // defined(FEATURE_SVR_GC)
-
-#ifdef SERVER_GC
-#undef SERVER_GC
-#endif
-
-#endif // defined(FEATURE_SVR_GC)
-#endif // __INTELLISENSE__
-
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
-#include "vxsort/do_vxsort.h"
-#define USE_VXSORT
-#else
-#define USE_INTROSORT
-#endif // TARGET_AMD64 || TARGET_ARM64
-#include "introsort.h"
+#include "gcinternal.h"
 
 #ifdef SERVER_GC
 namespace SVR {
@@ -64,77 +24,12 @@ namespace SVR {
 namespace WKS {
 #endif // SERVER_GC
 
-#include "gcimpl.h"
-#include "gcpriv.h"
-
-#ifdef DACCESS_COMPILE
-#error this source file should not be compiled with DACCESS_COMPILE!
-#endif //DACCESS_COMPILE
-
-// We just needed a simple random number generator for testing.
-class gc_rand
-{
-public:
-    static uint64_t x;
-
-    static uint64_t get_rand()
-    {
-        x = (314159269*x+278281) & 0x7FFFFFFF;
-        return x;
-    }
-
-    // obtain random number in the range 0 .. r-1
-    static uint64_t get_rand(uint64_t r)
-    {
-        // require r >= 0
-        uint64_t x = (uint64_t)((get_rand() * r) >> 31);
-        return x;
-    }
-};
-
 uint64_t gc_rand::x = 0;
 
 #if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
 BOOL bgc_heap_walk_for_etw_p = FALSE;
 #endif //BACKGROUND_GC && FEATURE_EVENT_TRACE
 
-#define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0))
-#define commit_min_th (16*OS_PAGE_SIZE)
-
-#define MIN_SOH_CROSS_GEN_REFS (400)
-#define MIN_LOH_CROSS_GEN_REFS (800)
-
-#ifdef SERVER_GC
-#define partial_size_th 100
-#define num_partial_refs 64
-#else //SERVER_GC
-#define partial_size_th 100
-#define num_partial_refs 32
-#endif //SERVER_GC
-
-#define demotion_plug_len_th (6*1024*1024)
-
-#ifdef USE_REGIONS
-// If the survived / region_size is 90+%, we don't compact this region.
-#define sip_surv_ratio_th (90)
-// If the survived due to cards from old generations / region_size is 90+%,
-// we don't compact this region, also we immediately promote it to gen2.
-#define sip_old_card_surv_ratio_th (90)
-#endif //USE_REGIONS
-
-#ifdef HOST_64BIT
-#define MARK_STACK_INITIAL_LENGTH 1024
-#else
-#define MARK_STACK_INITIAL_LENGTH 128
-#endif // HOST_64BIT
-
-#define LOH_PIN_QUEUE_LENGTH 100
-#define LOH_PIN_DECAY 10
-
-#define UOH_ALLOCATION_RETRY_MAX_COUNT 2
-
-#define MAX_YP_SPIN_COUNT_UNIT 32768
-
 uint32_t yp_spin_count_unit = 0;
 uint32_t original_spin_count_unit = 0;
 size_t loh_size_threshold = LARGE_OBJECT_SIZE;
@@ -160,7 +55,7 @@ uint8_t g_build_variant = 2;
 VOLATILE(int32_t) g_no_gc_lock = -1;
 
 #ifdef TRACE_GC
-const char * const allocation_state_str[] = {
+extern const char * const allocation_state_str[] = {
     "start",
     "can_allocate",
     "cant_allocate",
@@ -230,7 +125,7 @@ static const char* const str_gc_pause_modes[] =
     "no_gc"
 };
 
-static const char* const str_root_kinds[] = {
+const char* const str_root_kinds[] = {
     "Stack",
     "FinalizeQueue",
     "Handles",
@@ -244,20 +139,6 @@ static const char* const str_root_kinds[] = {
 };
 #endif //DT_LOG || TRACE_GC
 
-inline
-BOOL is_induced (gc_reason reason)
-{
-    return ((reason == reason_induced) ||
-            (reason == reason_induced_noforce) ||
-            (reason == reason_lowmemory) ||
-            (reason == reason_lowmemory_blocking) ||
-            (reason == reason_induced_compacting) ||
-            (reason == reason_induced_aggressive) ||
-            (reason == reason_lowmemory_host) ||
-            (reason == reason_lowmemory_host_blocking));
-}
-
-
 gc_oh_num gen_to_oh(int gen)
 {
     switch (gen)
@@ -334,78 +215,6 @@ double gc_heap::bgc_tuning::ratio_correction_step = 0.0;
 int gc_heap::saved_bgc_tuning_reason = -1;
 #endif //BGC_SERVO_TUNING
 
-inline
-size_t round_up_power2 (size_t size)
-{
-    // Get the 0-based index of the most-significant bit in size-1.
-    // If the call failed (because size-1 is zero), size must be 1,
-    // so return 1 (because 1 rounds up to itself).
-    DWORD highest_set_bit_index;
-    if (0 ==
-#ifdef HOST_64BIT
-        BitScanReverse64(
-#else
-        BitScanReverse(
-#endif
-            &highest_set_bit_index, size - 1)) { return 1; }
-
-    // The size == 0 case (which would have overflowed to SIZE_MAX when decremented)
-    // is handled below by relying on the fact that highest_set_bit_index is the maximum value
-    // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that
-    // number of bits shifts in zeros from the right, resulting in an output of zero.
-    return static_cast<size_t>(2) << highest_set_bit_index;
-}
-
-inline
-size_t round_down_power2 (size_t size)
-{
-    // Get the 0-based index of the most-significant bit in size.
-    // If the call failed, size must be zero so return zero.
-    DWORD highest_set_bit_index;
-    if (0 ==
-#ifdef HOST_64BIT
-        BitScanReverse64(
-#else
-        BitScanReverse(
-#endif
-            &highest_set_bit_index, size)) { return 0; }
-
-    // Left-shift 1 by highest_set_bit_index to get back a value containing only
-    // the most-significant set bit of size, i.e. size rounded down
-    // to the next power-of-two value.
-    return static_cast<size_t>(1) << highest_set_bit_index;
-}
-
-// Get the 0-based index of the most-significant bit in the value.
-// Returns -1 if the input value is zero (i.e. has no set bits).
-inline
-int index_of_highest_set_bit (size_t value)
-{
-    // Get the 0-based index of the most-significant bit in the value.
-    // If the call failed (because value is zero), return -1.
-    DWORD highest_set_bit_index;
-    return (0 ==
-#ifdef HOST_64BIT
-        BitScanReverse64(
-#else
-        BitScanReverse(
-#endif
-            &highest_set_bit_index, value)) ? -1 : static_cast<int>(highest_set_bit_index);
-}
-
-
-inline
-float mb (size_t num)
-{
-    return (float)((float)num / 1000.0 / 1000.0);
-}
-
-inline
-size_t gib (size_t num)
-{
-    return (num / 1024 / 1024 / 1024);
-}
-
 #ifdef BACKGROUND_GC
 uint32_t bgc_alloc_spin_count = 140;
 uint32_t bgc_alloc_spin = 2;
@@ -428,15 +237,6 @@ float bgc_uoh_inc_ratio_alloc_wait = 2.0f;
 float bgc_uoh_inc_ratio_alloc_wait = 1.0f;
 #endif //USE_REGIONS
 
-inline
-void c_write (uint32_t& place, uint32_t value)
-{
-    Interlocked::Exchange (&place, value);
-}
-
-// If every heap's gen2 or gen3 size is less than this threshold we will do a blocking GC.
-const size_t bgc_min_per_heap = 4*1024*1024;
-
 int gc_heap::gchist_index = 0;
 gc_mechanisms_store gc_heap::gchist[max_history_count];
 
@@ -453,7 +253,6 @@ BOOL   gc_config_log_on = FALSE;
 FILE* gc_config_log = NULL;
 
 // we keep this much in a buffer and only flush when the buffer is full
-#define gc_config_log_buffer_size (1*1024) // TEMP
 uint8_t* gc_config_log_buffer = 0;
 size_t gc_config_log_buffer_offset = 0;
 
@@ -541,5795 +340,2582 @@ process_sync_log_stats()
 #ifdef MULTIPLE_HEAPS
 uint32_t g_num_active_processors = 0;
 
-// Note that when a join is no longer used we still keep the values here because
-// tooling already recognized them as having the meaning they were assigned originally.
-// It doesn't break tooling if we stop using them but does if we assign a new meaning
-// to them.
-enum gc_join_stage
-{
-    gc_join_init_cpu_mapping = 0,
-    gc_join_done = 1,
-    gc_join_generation_determined = 2,
-    gc_join_begin_mark_phase = 3,
-    gc_join_scan_dependent_handles = 4,
-    gc_join_rescan_dependent_handles = 5,
-    gc_join_scan_sizedref_done = 6,
-    gc_join_null_dead_short_weak = 7,
-    gc_join_scan_finalization = 8,
-    gc_join_null_dead_long_weak = 9,
-    gc_join_null_dead_syncblk = 10,
-    gc_join_decide_on_compaction = 11,
-    gc_join_rearrange_segs_compaction = 12,
-    gc_join_adjust_handle_age_compact = 13,
-    gc_join_adjust_handle_age_sweep = 14,
-    gc_join_begin_relocate_phase = 15,
-    gc_join_relocate_phase_done = 16,
-    gc_join_verify_objects_done = 17,
-    gc_join_start_bgc = 18,
-    gc_join_restart_ee = 19,
-    gc_join_concurrent_overflow = 20,
-    gc_join_suspend_ee = 21,
-    gc_join_bgc_after_ephemeral = 22,
-    gc_join_allow_fgc = 23,
-    gc_join_bgc_sweep = 24,
-    gc_join_suspend_ee_verify = 25,
-    gc_join_restart_ee_verify = 26,
-    gc_join_set_state_free = 27,
-    gc_r_join_update_card_bundle = 28,
-    gc_join_after_absorb = 29,
-    gc_join_verify_copy_table = 30,
-    gc_join_after_reset = 31,
-    gc_join_after_ephemeral_sweep = 32,
-    gc_join_after_profiler_heap_walk = 33,
-    gc_join_minimal_gc = 34,
-    gc_join_after_commit_soh_no_gc = 35,
-    gc_join_expand_loh_no_gc = 36,
-    gc_join_final_no_gc = 37,
-    // No longer in use but do not remove, see comments for this enum.
-    gc_join_disable_software_write_watch = 38,
-    gc_join_merge_temp_fl = 39,
-    gc_join_bridge_processing = 40,
-    gc_join_max = 41
-};
+t_join gc_t_join;
 
-enum gc_join_flavor
-{
-    join_flavor_server_gc = 0,
-    join_flavor_bgc = 1
-};
+#ifdef BACKGROUND_GC
+t_join bgc_t_join;
+#endif //BACKGROUND_GC
 
-#define first_thread_arrived 2
-#pragma warning(push)
-#pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads
-struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure
-{
-    // Shared non volatile keep on separate line to prevent eviction
-    int n_threads;
+#endif //MULTIPLE_HEAPS
 
-    // Keep polling/wait structures on separate line write once per join
-    DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE)
-    GCEvent joined_event[3]; // the last event in the array is only used for first_thread_arrived.
-    Volatile<int> lock_color;
-    VOLATILE(BOOL) wait_done;
-    VOLATILE(BOOL) joined_p;
+void reset_memory (uint8_t* o, size_t sizeo);
 
-    // Keep volatile counted locks on separate cache line write many per join
-    DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE)
-    VOLATILE(int) join_lock;
-    VOLATILE(int) r_join_lock;
+#ifdef WRITE_WATCH
 
-};
-#pragma warning(pop)
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+bool virtual_alloc_hardware_write_watch = false;
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 
-enum join_type
-{
-    type_last_join = 0,
-    type_join = 1,
-    type_restart = 2,
-    type_first_r_join = 3,
-    type_r_join = 4
-};
+bool hardware_write_watch_capability = false;
 
-enum join_time
-{
-    time_start = 0,
-    time_end = 1
-};
+#else //WRITE_WATCH
+#define mem_reserve (MEM_RESERVE)
+#endif //WRITE_WATCH
 
-enum join_heap_index
+void WaitLongerNoInstru (int i)
 {
-    join_heap_restart = 100,
-    join_heap_r_restart = 200
-};
+    // every 8th attempt:
+    bool bToggleGC = GCToEEInterface::EnablePreemptiveGC();
 
-class t_join
-{
-    join_structure join_struct;
-
-    int id;
-    gc_join_flavor flavor;
-
-#ifdef JOIN_STATS
-    uint64_t start[MAX_SUPPORTED_CPUS], end[MAX_SUPPORTED_CPUS], start_seq;
-    // remember join id and last thread to arrive so restart can use these
-    int thd;
-    // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval
-    uint64_t start_tick;
-    // counters for joins, in 1000's of clock cycles
-    uint64_t elapsed_total[gc_join_max], wake_total[gc_join_max], seq_loss_total[gc_join_max], par_loss_total[gc_join_max], in_join_total[gc_join_max];
-#endif //JOIN_STATS
-
-public:
-    BOOL init (int n_th, gc_join_flavor f)
-    {
-        dprintf (JOIN_LOG, ("Initializing join structure"));
-        join_struct.n_threads = n_th;
-        join_struct.lock_color = 0;
-        for (int i = 0; i < 3; i++)
+    // if we're waiting for gc to finish, we should block immediately
+    if (g_fSuspensionPending == 0)
+    {
+        if  (g_num_processors > 1)
         {
-            if (!join_struct.joined_event[i].IsValid())
-            {
-                join_struct.joined_p = FALSE;
-                dprintf (JOIN_LOG, ("Creating join event %d", i));
-                // TODO - changing this to a non OS event
-                // because this is also used by BGC threads which are
-                // managed threads and WaitEx does not allow you to wait
-                // for an OS event on a managed thread.
-                // But we are not sure if this plays well in the hosting
-                // environment.
-                //join_struct.joined_event[i].CreateOSManualEventNoThrow(FALSE);
-                if (!join_struct.joined_event[i].CreateManualEventNoThrow(FALSE))
-                    return FALSE;
-            }
+            YieldProcessor();           // indicate to the processor that we are spinning
+            if  (i & 0x01f)
+                GCToOSInterface::YieldThread (0);
+            else
+                GCToOSInterface::Sleep (5);
         }
-        join_struct.join_lock = join_struct.n_threads;
-        join_struct.r_join_lock = join_struct.n_threads;
-        join_struct.wait_done = FALSE;
-        flavor = f;
-
-#ifdef JOIN_STATS
-        start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
-#endif //JOIN_STATS
-
-        return TRUE;
+        else
+            GCToOSInterface::Sleep (5);
     }
 
-    void update_n_threads(int n_th)
+    // If CLR is hosted, a thread may reach here while it is in preemptive GC mode,
+    // or it has no Thread object, in order to force a task to yield, or to triger a GC.
+    // It is important that the thread is going to wait for GC.  Otherwise the thread
+    // is in a tight loop.  If the thread has high priority, the perf is going to be very BAD.
+    if (bToggleGC)
     {
-        join_struct.n_threads = n_th;
-        join_struct.join_lock = n_th;
-        join_struct.r_join_lock = n_th;
+#ifdef _DEBUG
+        // In debug builds, all enter_spin_lock operations go through this code.  If a GC has
+        // started, it is important to block until the GC thread calls set_gc_done (since it is
+        // guaranteed to have cleared g_TrapReturningThreads by this point).  This avoids livelock
+        // conditions which can otherwise occur if threads are allowed to spin in this function
+        // (and therefore starve the GC thread) between the point when the GC thread sets the
+        // WaitForGC event and the point when the GC thread clears g_TrapReturningThreads.
+        if (gc_heap::gc_started)
+        {
+            gc_heap::wait_for_gc_done();
+        }
+#endif // _DEBUG
+        GCToEEInterface::DisablePreemptiveGC();
     }
-
-    int get_num_threads()
+    else if (g_fSuspensionPending > 0)
     {
-        return join_struct.n_threads;
+        g_theGCHeap->WaitUntilGCComplete();
     }
+}
 
-    // This is for instrumentation only.
-    int get_join_lock()
-    {
-        return VolatileLoadWithoutBarrier (&join_struct.join_lock);
-    }
+const int32_t lock_free = -1;
+const int32_t lock_taken = 0;
+const int32_t lock_decommissioned = 1;
 
-    void destroy ()
-    {
-        dprintf (JOIN_LOG, ("Destroying join structure"));
-        for (int i = 0; i < 3; i++)
-        {
-            if (join_struct.joined_event[i].IsValid())
-                join_struct.joined_event[i].CloseEvent();
-        }
-    }
 
-    inline void fire_event (int heap, join_time time, join_type type, int join_id)
+// If our heap got decommissioned, we need to try an existing heap.
+//inline
+bool gc_heap::should_move_heap (GCSpinLock* msl)
+{
+#ifdef MULTIPLE_HEAPS
+    if (msl->lock == lock_decommissioned)
     {
-        FIRE_EVENT(GCJoin_V2, heap, time, type, join_id);
+        dprintf (5555, ("heap#%d got decommissioned! need to retry", heap_number));
     }
+    return (msl->lock == lock_decommissioned);
+#else //MULTIPLE_HEAPS
+    return false;
+#endif //MULTIPLE_HEAPS
+}
 
-    void join (gc_heap* gch, int join_id)
+// All the places where we could be stopped because there was a suspension should call should_move_heap to check if we need to return
+// so we can try another heap or we can continue the allocation on the same heap.
+enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl)
+{
+    do
     {
-#ifdef JOIN_STATS
-        // parallel execution ends here
-        end[gch->heap_number] = get_ts();
-#endif //JOIN_STATS
-
-        assert (!join_struct.joined_p);
-        int color = join_struct.lock_color.LoadWithoutBarrier();
+#ifdef DYNAMIC_HEAP_COUNT
+        uint64_t start = GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
 
-        if (Interlocked::Decrement(&join_struct.join_lock) != 0)
+        unsigned int i = 0;
+        while (VolatileLoad (&msl->lock) != lock_free)
         {
-            dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d",
-                flavor, join_id, (int32_t)(join_struct.join_lock)));
-
-            fire_event (gch->heap_number, time_start, type_join, join_id);
-
-            //busy wait around the color
-            if (color == join_struct.lock_color.LoadWithoutBarrier())
+            if (should_move_heap (msl))
             {
-respin:
-                int spin_count = 128 * yp_spin_count_unit;
-                for (int j = 0; j < spin_count; j++)
+                return msl_retry_different_heap;
+            }
+            if ((++i & 7) && !IsGCInProgress ())
+            {
+                if (g_num_processors > 1)
                 {
-                    if (color != join_struct.lock_color.LoadWithoutBarrier())
+#ifndef MULTIPLE_HEAPS
+                    int spin_count = 32 * yp_spin_count_unit;
+#else //!MULTIPLE_HEAPS
+                    int spin_count = yp_spin_count_unit;
+#endif //!MULTIPLE_HEAPS
+                    for (int j = 0; j < spin_count; j++)
                     {
-                        break;
+                        if (VolatileLoad (&msl->lock) == lock_free || IsGCInProgress ())
+                            break;
+                        // give the HT neighbor a chance to run
+                        YieldProcessor ();
                     }
-                    YieldProcessor();           // indicate to the processor that we are spinning
-                }
-
-                // we've spun, and if color still hasn't changed, fall into hard wait
-                if (color == join_struct.lock_color.LoadWithoutBarrier())
-                {
-                    dprintf (JOIN_LOG, ("join%d(%d): Join() hard wait on reset event %d, join_lock is now %d",
-                        flavor, join_id, color, (int32_t)(join_struct.join_lock)));
-
-                    uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE);
-
-                    if (dwJoinWait != WAIT_OBJECT_0)
+                    if (VolatileLoad (&msl->lock) != lock_free && !IsGCInProgress ())
                     {
-                        STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait);
-                        FATAL_GC_ERROR ();
+#ifdef DYNAMIC_HEAP_COUNT
+                        start -= GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
+                        safe_switch_to_thread ();
+#ifdef DYNAMIC_HEAP_COUNT
+                        start += GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
                     }
                 }
-
-                // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
-                if (color == join_struct.lock_color.LoadWithoutBarrier())
+                else
                 {
-                    dprintf (9999, ("---h%d %d j%d %d - respin!!! (c:%d-%d)",
-                        gch->heap_number, join_id, join_struct.n_threads, color, join_struct.lock_color.LoadWithoutBarrier()));
-                    goto respin;
+                    safe_switch_to_thread ();
                 }
-
-                dprintf (JOIN_LOG, ("join%d(%d): Join() done, join_lock is %d",
-                    flavor, join_id, (int32_t)(join_struct.join_lock)));
             }
-
-            fire_event (gch->heap_number, time_end, type_join, join_id);
-
-#ifdef JOIN_STATS
-            // parallel execution starts here
-            start[gch->heap_number] = get_ts();
-            Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number]));
-#endif //JOIN_STATS
-        }
-        else
-        {
-            fire_event (gch->heap_number, time_start, type_last_join, join_id);
-
-            join_struct.joined_p = TRUE;
-            dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id));
-            join_struct.joined_event[!color].Reset();
-            id = join_id;
-#ifdef JOIN_STATS
-            // remember the join id, the last thread arriving, the start of the sequential phase,
-            // and keep track of the cycles spent waiting in the join
-            thd = gch->heap_number;
-            start_seq = get_ts();
-            Interlocked::ExchangeAdd(&in_join_total[join_id], (start_seq - end[gch->heap_number]));
-#endif //JOIN_STATS
+            else
+            {
+#ifdef DYNAMIC_HEAP_COUNT
+                start -= GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
+                WaitLongerNoInstru (i);
+#ifdef DYNAMIC_HEAP_COUNT
+                start += GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
+            }
         }
+#ifdef DYNAMIC_HEAP_COUNT
+        uint64_t end = GetHighPrecisionTimeStamp();
+        Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start);
+        dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time));
+#endif //DYNAMIC_HEAP_COUNT
     }
+    while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free);
 
-    // Reverse join - first thread gets here does the work; other threads will only proceed
-    // after the work is done.
-    // Note that you cannot call this twice in a row on the same thread. Plus there's no
-    // need to call it twice in row - you should just merge the work.
-    BOOL r_join (gc_heap* gch, int join_id)
-    {
-
-        if (join_struct.n_threads == 1)
-        {
-            return TRUE;
-        }
+    return msl_entered;
+}
 
-        if (Interlocked::CompareExchange(&join_struct.r_join_lock, 0, join_struct.n_threads) == 0)
-        {
-            fire_event (gch->heap_number, time_start, type_join, join_id);
+#ifdef _DEBUG
 
-            dprintf (JOIN_LOG, ("r_join() Waiting..."));
+#define ASSERT_NOT_HOLDING_SPIN_LOCK(pSpinLock) \
+    _ASSERTE((pSpinLock)->holding_thread != GCToEEInterface::GetThread());
 
-            //busy wait around the color
-respin:
-            int spin_count = 256 * yp_spin_count_unit;
-            for (int j = 0; j < spin_count; j++)
-            {
-                if (join_struct.wait_done)
-                {
-                    break;
-                }
-                YieldProcessor();           // indicate to the processor that we are spinning
-            }
+#else //_DEBUG
+#endif //_DEBUG
 
-            // we've spun, and if color still hasn't changed, fall into hard wait
-            if (!join_struct.wait_done)
-            {
-                dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived));
-                uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE);
-                if (dwJoinWait != WAIT_OBJECT_0)
-                {
-                    STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait);
-                    FATAL_GC_ERROR ();
-                }
-            }
+bool gc_heap::enable_preemptive ()
+{
+    return GCToEEInterface::EnablePreemptiveGC();
+}
 
-            // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
-            if (!join_struct.wait_done)
-            {
-                goto respin;
-            }
-
-            dprintf (JOIN_LOG, ("r_join() done"));
-
-            fire_event (gch->heap_number, time_end, type_join, join_id);
-
-            return FALSE;
-        }
-        else
-        {
-            fire_event (gch->heap_number, time_start, type_first_r_join, join_id);
-            return TRUE;
-        }
-    }
-
-#ifdef JOIN_STATS
-    uint64_t get_ts()
+void gc_heap::disable_preemptive (bool restore_cooperative)
+{
+    if (restore_cooperative)
     {
-        return GCToOSInterface::QueryPerformanceCounter();
+        GCToEEInterface::DisablePreemptiveGC();
     }
+}
 
-    void start_ts (gc_heap* gch)
-    {
-        // parallel execution ends here
-        start[gch->heap_number] = get_ts();
-    }
-#endif //JOIN_STATS
+inline
+ptrdiff_t round_down (ptrdiff_t add, int pitch)
+{
+    return ((add / pitch) * pitch);
+}
 
-    void restart()
-    {
-#ifdef JOIN_STATS
-        uint64_t elapsed_seq = get_ts() - start_seq;
-        uint64_t max = 0, sum = 0, wake = 0;
-        uint64_t min_ts = start[0];
-        for (int i = 1; i < join_struct.n_threads; i++)
-        {
-            if(min_ts > start[i]) min_ts = start[i];
-        }
+#if defined(FEATURE_STRUCTALIGN) && defined(RESPECT_LARGE_ALIGNMENT)
+// FEATURE_STRUCTALIGN allows the compiler to dictate the alignment,
+// i.e, if a larger alignment matters or is beneficial, the compiler
+// generated info tells us so.  RESPECT_LARGE_ALIGNMENT is just the
+// converse - it's a heuristic for the GC to use a larger alignment.
+#error FEATURE_STRUCTALIGN should imply !RESPECT_LARGE_ALIGNMENT
+#endif
 
-        for (int i = 0; i < join_struct.n_threads; i++)
-        {
-            uint64_t wake_delay = start[i] - min_ts;
-            uint64_t elapsed = end[i] - start[i];
-            if (max < elapsed)
-                max = elapsed;
-            sum += elapsed;
-            wake += wake_delay;
-        }
-        uint64_t seq_loss = (join_struct.n_threads - 1)*elapsed_seq;
-        uint64_t par_loss = join_struct.n_threads*max - sum;
-        double efficiency = 0.0;
-        if (max > 0)
-            efficiency = sum*100.0/(join_struct.n_threads*max);
+#if defined(FEATURE_STRUCTALIGN) && defined(FEATURE_LOH_COMPACTION)
+#error FEATURE_STRUCTALIGN and FEATURE_LOH_COMPACTION are mutually exclusive
+#endif
 
-        const double ts_scale = 1e-6;
+// This is always power of 2.
+#ifdef HOST_64BIT
+const size_t min_segment_size_hard_limit = 1024*1024*16;
+#else //HOST_64BIT
+const size_t min_segment_size_hard_limit = 1024*1024*4;
+#endif //HOST_64BIT
 
-        // enable this printf to get statistics on each individual join as it occurs
-        //printf("join #%3d  seq_loss = %5g   par_loss = %5g  efficiency = %3.0f%%\n", join_id, ts_scale*seq_loss, ts_scale*par_loss, efficiency);
+extern const size_t etw_allocation_tick = 100*1024;
 
-        elapsed_total[id] += sum;
-        wake_total[id] += wake;
-        seq_loss_total[id] += seq_loss;
-        par_loss_total[id] += par_loss;
+extern const size_t low_latency_alloc = 256*1024;
 
-        // every 10 seconds, print a summary of the time spent in each type of join
-        if (GCToOSInterface::GetLowPrecisionTimeStamp() - start_tick > 10*1000)
-        {
-            printf("**** summary *****\n");
-            for (int i = 0; i < 16; i++)
-            {
-                printf("join #%3d  elapsed_total = %8g wake_loss = %8g seq_loss = %8g  par_loss = %8g  in_join_total = %8g\n",
-                   i,
-                   ts_scale*elapsed_total[i],
-                   ts_scale*wake_total[i],
-                   ts_scale*seq_loss_total[i],
-                   ts_scale*par_loss_total[i],
-                   ts_scale*in_join_total[i]);
-                elapsed_total[i] = wake_total[i] = seq_loss_total[i] = par_loss_total[i] = in_join_total[i] = 0;
-            }
-            start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
-        }
-#endif //JOIN_STATS
+extern const size_t fgn_check_quantum = 2*1024*1024;
 
-        fire_event (join_heap_restart, time_start, type_restart, -1);
-        assert (join_struct.joined_p);
-        join_struct.joined_p = FALSE;
-        join_struct.join_lock = join_struct.n_threads;
-        dprintf (JOIN_LOG, ("join%d(%d): Restarting from join: join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock)));
-        int color = join_struct.lock_color.LoadWithoutBarrier();
-        join_struct.lock_color = !color;
-        join_struct.joined_event[color].Set();
+#ifdef MH_SC_MARK
+const int max_snoop_level = 128;
+#endif //MH_SC_MARK
 
-        fire_event (join_heap_restart, time_end, type_restart, -1);
+#ifdef USE_REGIONS
+void region_write_barrier_settings (WriteBarrierParameters* args,
+                                    gc_heap::region_info* map_region_to_generation_skewed,
+                                    uint8_t region_shr)
+{
+    switch (GCConfig::GetGCWriteBarrier())
+    {
+    default:
+    case GCConfig::WRITE_BARRIER_DEFAULT:
+    case GCConfig::WRITE_BARRIER_REGION_BIT:
+        // bitwise region write barrier is the default now
+        args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed;
+        args->region_shr = region_shr;
+        args->region_use_bitwise_write_barrier = true;
+        break;
 
-#ifdef JOIN_STATS
-        start[thd] = get_ts();
-#endif //JOIN_STATS
-    }
+    case GCConfig::WRITE_BARRIER_REGION_BYTE:
+        // bytewise region write barrier
+        args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed;
+        args->region_shr = region_shr;
+        assert (args->region_use_bitwise_write_barrier == false);
+        break;
 
-    BOOL joined()
-    {
-        dprintf (JOIN_LOG, ("join%d(%d): joined, join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock)));
-        return join_struct.joined_p;
+    case GCConfig::WRITE_BARRIER_SERVER:
+        // server write barrier
+        // args should have been zero initialized
+        assert (args->region_use_bitwise_write_barrier == false);
+        assert (args->region_to_generation_table == nullptr);
+        assert (args->region_shr == 0);
+        break;
     }
+}
+#endif //USE_REGIONS
 
-    void r_restart()
-    {
-        if (join_struct.n_threads != 1)
-        {
-            fire_event (join_heap_r_restart, time_start, type_restart, -1);
-            join_struct.wait_done = TRUE;
-            join_struct.joined_event[first_thread_arrived].Set();
-            fire_event (join_heap_r_restart, time_end, type_restart, -1);
-        }
-    }
+void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high
+#ifdef USE_REGIONS
+                                   , gc_heap::region_info* map_region_to_generation_skewed
+                                   , uint8_t region_shr
+#endif //USE_REGIONS
+                                   )
+{
+#ifndef USE_REGIONS
+    initGCShadow();
+#endif
 
-    void r_init()
-    {
-        if (join_struct.n_threads != 1)
-        {
-            join_struct.r_join_lock = join_struct.n_threads;
-            join_struct.wait_done = FALSE;
-            join_struct.joined_event[first_thread_arrived].Reset();
-        }
-    }
-};
+    WriteBarrierParameters args = {};
+    args.operation = WriteBarrierOp::StompEphemeral;
+    args.is_runtime_suspended = true;
+    args.ephemeral_low = ephemeral_low;
+    args.ephemeral_high = ephemeral_high;
+#ifdef USE_REGIONS
+    region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr);
+#endif //USE_REGIONS
+    GCToEEInterface::StompWriteBarrier(&args);
+}
 
-t_join gc_t_join;
+void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high
+#ifdef USE_REGIONS
+                                   , gc_heap::region_info* map_region_to_generation_skewed
+                                   , uint8_t region_shr
+#endif //USE_REGIONS
+                                   )
+{
+    WriteBarrierParameters args = {};
+    args.operation = WriteBarrierOp::Initialize;
+    args.is_runtime_suspended = true;
+    args.requires_upper_bounds_check = false;
+    args.card_table = g_gc_card_table;
 
-#ifdef BACKGROUND_GC
-t_join bgc_t_join;
-#endif //BACKGROUND_GC
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    args.card_bundle_table = g_gc_card_bundle_table;
+#endif
 
-#endif //MULTIPLE_HEAPS
+    args.lowest_address = g_gc_lowest_address;
+    args.highest_address = g_gc_highest_address;
+    args.ephemeral_low = ephemeral_low;
+    args.ephemeral_high = ephemeral_high;
 
-#define spin_and_switch(count_to_spin, expr) \
-{ \
-    for (int j = 0; j < count_to_spin; j++) \
-    { \
-        if (expr) \
-        { \
-            break;\
-        } \
-        YieldProcessor(); \
-    } \
-    if (!(expr)) \
-    { \
-        GCToOSInterface::YieldThread(0); \
-    } \
-}
+#ifdef USE_REGIONS
+    region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr);
+#endif //USE_REGIONS
 
-#define spin_and_wait(count_to_spin, expr) \
-{ \
-    while (!expr) \
-    { \
-        for (int j = 0; j < count_to_spin; j++) \
-        { \
-            if (expr) \
-            { \
-                break; \
-            } \
-                YieldProcessor (); \
-        } \
-        if (!(expr)) \
-        { \
-            GCToOSInterface::YieldThread (0); \
-        } \
-    } \
+    GCToEEInterface::StompWriteBarrier(&args);
 }
 
-#ifdef BACKGROUND_GC
-
-#define max_pending_allocs 64
+class mark;
+class generation;
+class heap_segment;
+class CObjectHeader;
+class dynamic_data;
+class l_heap;
+class sorted_table;
+class c_synchronize;
 
-class exclusive_sync
-{
-    VOLATILE(uint8_t*) rwp_object;
-    VOLATILE(int32_t) needs_checking;
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+static
+HRESULT AllocateCFinalize(CFinalize **pCFinalize);
+#endif // FEATURE_PREMORTEM_FINALIZATION
 
-    int spin_count;
+uint8_t* tree_search (uint8_t* tree, uint8_t* old_address);
 
-    uint8_t cache_separator[HS_CACHE_LINE_SIZE - (sizeof (spin_count) + sizeof (needs_checking) + sizeof (rwp_object))];
 
-    // TODO - perhaps each object should be on its own cache line...
-    VOLATILE(uint8_t*) alloc_objects[max_pending_allocs];
+/* per heap static initialization */
+#if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS)
+uint32_t*   gc_heap::mark_array;
+#endif //BACKGROUND_GC && !MULTIPLE_HEAPS
 
-    int find_free_index ()
-    {
-        for (int i = 0; i < max_pending_allocs; i++)
-        {
-            if (alloc_objects [i] == (uint8_t*)0)
-            {
-                return i;
-            }
-        }
+uint8_t**   gc_heap::g_mark_list;
+uint8_t**   gc_heap::g_mark_list_copy;
+size_t      gc_heap::mark_list_size;
+size_t      gc_heap::g_mark_list_total_size;
+bool        gc_heap::mark_list_overflow;
+#ifdef USE_REGIONS
+uint8_t***  gc_heap::g_mark_list_piece;
+size_t      gc_heap::g_mark_list_piece_size;
+size_t      gc_heap::g_mark_list_piece_total_size;
+#endif //USE_REGIONS
 
-        return -1;
-    }
+seg_mapping* seg_mapping_table;
 
-public:
-    void init()
-    {
-        spin_count = 32 * (g_num_processors - 1);
-        rwp_object = 0;
-        needs_checking = 0;
-        for (int i = 0; i < max_pending_allocs; i++)
-        {
-            alloc_objects [i] = (uint8_t*)0;
-        }
-    }
+#ifdef FEATURE_BASICFREEZE
+sorted_table* gc_heap::seg_table;
+#endif //FEATURE_BASICFREEZE
 
-    void check()
-    {
-        for (int i = 0; i < max_pending_allocs; i++)
-        {
-            if (alloc_objects [i] != (uint8_t*)0)
-            {
-                FATAL_GC_ERROR();
-            }
-        }
-    }
+#ifdef MULTIPLE_HEAPS
+GCEvent     gc_heap::ee_suspend_event;
+size_t      gc_heap::min_gen0_balance_delta = 0;
+size_t      gc_heap::min_balance_threshold = 0;
+#endif //MULTIPLE_HEAPS
 
-    void bgc_mark_set (uint8_t* obj)
-    {
-        dprintf (3, ("cm: probing %p", obj));
-retry:
-        if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0)
-        {
-            // If we spend too much time spending all the allocs,
-            // consider adding a high water mark and scan up
-            // to that; we'll need to interlock in done when
-            // we update the high watermark.
-            for (int i = 0; i < max_pending_allocs; i++)
-            {
-                if (obj == alloc_objects[i])
-                {
-                    needs_checking = 0;
-                    dprintf (3, ("cm: will spin"));
-                    spin_and_switch (spin_count, (obj != alloc_objects[i]));
-                    goto retry;
-                }
-            }
+VOLATILE(BOOL) gc_heap::gc_started;
 
-            rwp_object = obj;
-            needs_checking = 0;
-            dprintf (3, ("cm: set %p", obj));
-            return;
-        }
-        else
-        {
-            spin_and_switch (spin_count, (needs_checking == 0));
-            goto retry;
-        }
-    }
+#ifdef MULTIPLE_HEAPS
+GCEvent     gc_heap::gc_start_event;
+bool        gc_heap::gc_thread_no_affinitize_p = false;
+uintptr_t   process_mask = 0;
 
-    int uoh_alloc_set (uint8_t* obj)
-    {
-        if (!gc_heap::cm_in_progress)
-        {
-            return -1;
-        }
+int         gc_heap::n_heaps;       // current number of heaps
+int         gc_heap::n_max_heaps;   // maximum number of heaps
 
-retry:
-        dprintf (3, ("uoh alloc: probing %p", obj));
+gc_heap**   gc_heap::g_heaps;
 
-        if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0)
-        {
-            if (obj == rwp_object)
-            {
-                needs_checking = 0;
-                spin_and_switch (spin_count, (obj != rwp_object));
-                goto retry;
-            }
-            else
-            {
-                int cookie = find_free_index();
+#if !defined(USE_REGIONS) || defined(_DEBUG)
+size_t*     gc_heap::g_promoted;
+#endif //!USE_REGIONS || _DEBUG
 
-                if (cookie != -1)
-                {
-                    alloc_objects[cookie] = obj;
-                    needs_checking = 0;
-                    //if (cookie >= 4)
-                    //{
-                    //    GCToOSInterface::DebugBreak();
-                    //}
-
-                    dprintf (3, ("uoh alloc: set %p at %d", obj, cookie));
-                    return cookie;
-                }
-                else
-                {
-                    needs_checking = 0;
-                    dprintf (3, ("uoh alloc: setting %p will spin to acquire a free index", obj));
-                    spin_and_switch (spin_count, (find_free_index () != -1));
-                    goto retry;
-                }
-            }
-        }
-        else
-        {
-            dprintf (3, ("uoh alloc: will spin on checking %p", obj));
-            spin_and_switch (spin_count, (needs_checking == 0));
-            goto retry;
-        }
-    }
+#ifdef MH_SC_MARK
+int*        gc_heap::g_mark_stack_busy;
+#endif //MH_SC_MARK
 
-    void bgc_mark_done ()
-    {
-        dprintf (3, ("cm: release lock on %p", (uint8_t *)rwp_object));
-        rwp_object = 0;
-    }
+#ifdef BACKGROUND_GC
+size_t*     gc_heap::g_bpromoted;
+#endif //BACKGROUND_GC
 
-    void uoh_alloc_done_with_index (int index)
-    {
-        dprintf (3, ("uoh alloc: release lock on %p based on %d", (uint8_t *)alloc_objects[index], index));
-        assert ((index >= 0) && (index < max_pending_allocs));
-        alloc_objects[index] = (uint8_t*)0;
-    }
+BOOL        gc_heap::gradual_decommit_in_progress_p = FALSE;
+size_t      gc_heap::max_decommit_step_size = 0;
+#else  //MULTIPLE_HEAPS
 
-    void uoh_alloc_done (uint8_t* obj)
-    {
-        if (!gc_heap::cm_in_progress)
-        {
-            return;
-        }
-
-        for (int i = 0; i < max_pending_allocs; i++)
-        {
-            if (alloc_objects [i] == obj)
-            {
-                uoh_alloc_done_with_index(i);
-                return;
-            }
-        }
-        dprintf (3, ("uoh alloc: could not release lock on %p", obj));
-    }
-};
+#if !defined(USE_REGIONS) || defined(_DEBUG)
+size_t      gc_heap::g_promoted;
+#endif //!USE_REGIONS || _DEBUG
 
+#ifdef BACKGROUND_GC
+size_t      gc_heap::g_bpromoted;
 #endif //BACKGROUND_GC
 
-void reset_memory (uint8_t* o, size_t sizeo);
-
-#ifdef WRITE_WATCH
+// this is just to have fewer #ifdefs in code shared between WKS and SVR
+// for filling out ScanContext structs
+extern const int n_heaps = 1;
 
-#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
-static bool virtual_alloc_hardware_write_watch = false;
-#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+#endif //MULTIPLE_HEAPS
 
-static bool hardware_write_watch_capability = false;
+size_t      gc_heap::card_table_element_layout[total_bookkeeping_elements + 1];
+uint8_t*    gc_heap::bookkeeping_start = nullptr;
+#ifdef USE_REGIONS
+uint8_t*    gc_heap::bookkeeping_covered_committed = nullptr;
+size_t      gc_heap::bookkeeping_sizes[total_bookkeeping_elements];
+#endif //USE_REGIONS
 
+size_t      gc_heap::reserved_memory = 0;
+size_t      gc_heap::reserved_memory_limit = 0;
+BOOL        gc_heap::g_low_memory_status;
 
-inline bool can_use_hardware_write_watch()
-{
-    return hardware_write_watch_capability;
-}
+gc_reason gc_trigger_reason = reason_empty;
 
+gc_latency_level gc_heap::latency_level = latency_level_default;
 
-inline bool can_use_write_watch_for_card_table()
-{
-#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
-    return true;
-#else
-    return can_use_hardware_write_watch();
-#endif
-}
+gc_mechanisms  gc_heap::settings;
 
-#else //WRITE_WATCH
-#define mem_reserve (MEM_RESERVE)
-#endif //WRITE_WATCH
+gc_history_global gc_heap::gc_data_global;
 
-void WaitLongerNoInstru (int i)
-{
-    // every 8th attempt:
-    bool bToggleGC = GCToEEInterface::EnablePreemptiveGC();
+uint64_t    gc_heap::gc_last_ephemeral_decommit_time = 0;
 
-    // if we're waiting for gc to finish, we should block immediately
-    if (g_fSuspensionPending == 0)
-    {
-        if  (g_num_processors > 1)
-        {
-            YieldProcessor();           // indicate to the processor that we are spinning
-            if  (i & 0x01f)
-                GCToOSInterface::YieldThread (0);
-            else
-                GCToOSInterface::Sleep (5);
-        }
-        else
-            GCToOSInterface::Sleep (5);
-    }
+CLRCriticalSection gc_heap::check_commit_cs;
 
-    // If CLR is hosted, a thread may reach here while it is in preemptive GC mode,
-    // or it has no Thread object, in order to force a task to yield, or to triger a GC.
-    // It is important that the thread is going to wait for GC.  Otherwise the thread
-    // is in a tight loop.  If the thread has high priority, the perf is going to be very BAD.
-    if (bToggleGC)
-    {
-#ifdef _DEBUG
-        // In debug builds, all enter_spin_lock operations go through this code.  If a GC has
-        // started, it is important to block until the GC thread calls set_gc_done (since it is
-        // guaranteed to have cleared g_TrapReturningThreads by this point).  This avoids livelock
-        // conditions which can otherwise occur if threads are allowed to spin in this function
-        // (and therefore starve the GC thread) between the point when the GC thread sets the
-        // WaitForGC event and the point when the GC thread clears g_TrapReturningThreads.
-        if (gc_heap::gc_started)
-        {
-            gc_heap::wait_for_gc_done();
-        }
-#endif // _DEBUG
-        GCToEEInterface::DisablePreemptiveGC();
-    }
-    else if (g_fSuspensionPending > 0)
-    {
-        g_theGCHeap->WaitUntilGCComplete();
-    }
-}
+#ifdef COMMITTED_BYTES_SHADOW
+CLRCriticalSection gc_heap::decommit_lock;
+#endif //COMMITTED_BYTES_SHADOW
 
-inline
-static void safe_switch_to_thread()
-{
-    bool cooperative_mode = gc_heap::enable_preemptive();
+size_t      gc_heap::current_total_committed = 0;
 
-    GCToOSInterface::YieldThread(0);
+size_t      gc_heap::committed_by_oh[recorded_committed_bucket_counts];
 
-    gc_heap::disable_preemptive(cooperative_mode);
-}
+size_t      gc_heap::current_total_committed_bookkeeping = 0;
 
-#define check_msl_status(msg, size) if (msl_status == msl_retry_different_heap) \
-    { \
-        dprintf (5555, ("h%d RETRY %s(%Id)", heap_number, msg, size)); \
-        return a_state_retry_allocate; \
-    }
+BOOL        gc_heap::reset_mm_p = TRUE;
 
-static const int32_t lock_free = -1;
-static const int32_t lock_taken = 0;
-static const int32_t lock_decommissioned = 1;
+#ifdef FEATURE_EVENT_TRACE
+bool gc_heap::informational_event_enabled_p = false;
 
+uint64_t*   gc_heap::gc_time_info = 0;
 
-// If our heap got decommissioned, we need to try an existing heap.
-//inline
-bool gc_heap::should_move_heap (GCSpinLock* msl)
-{
-#ifdef MULTIPLE_HEAPS
-    if (msl->lock == lock_decommissioned)
-    {
-        dprintf (5555, ("heap#%d got decommissioned! need to retry", heap_number));
-    }
-    return (msl->lock == lock_decommissioned);
-#else //MULTIPLE_HEAPS
-    return false;
-#endif //MULTIPLE_HEAPS
-}
+#ifdef BACKGROUND_GC
+uint64_t*   gc_heap::bgc_time_info = 0;
+#endif //BACKGROUND_GC
 
-// All the places where we could be stopped because there was a suspension should call should_move_heap to check if we need to return
-// so we can try another heap or we can continue the allocation on the same heap.
-enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl)
-{
-    do
-    {
-#ifdef DYNAMIC_HEAP_COUNT
-        uint64_t start = GetHighPrecisionTimeStamp();
-#endif //DYNAMIC_HEAP_COUNT
+size_t      gc_heap::physical_memory_from_config = 0;
 
-        unsigned int i = 0;
-        while (VolatileLoad (&msl->lock) != lock_free)
-        {
-            if (should_move_heap (msl))
-            {
-                return msl_retry_different_heap;
-            }
-            if ((++i & 7) && !IsGCInProgress ())
-            {
-                if (g_num_processors > 1)
-                {
-#ifndef MULTIPLE_HEAPS
-                    int spin_count = 32 * yp_spin_count_unit;
-#else //!MULTIPLE_HEAPS
-                    int spin_count = yp_spin_count_unit;
-#endif //!MULTIPLE_HEAPS
-                    for (int j = 0; j < spin_count; j++)
-                    {
-                        if (VolatileLoad (&msl->lock) == lock_free || IsGCInProgress ())
-                            break;
-                        // give the HT neighbor a chance to run
-                        YieldProcessor ();
-                    }
-                    if (VolatileLoad (&msl->lock) != lock_free && !IsGCInProgress ())
-                    {
-#ifdef DYNAMIC_HEAP_COUNT
-                        start -= GetHighPrecisionTimeStamp();
-#endif //DYNAMIC_HEAP_COUNT
-                        safe_switch_to_thread ();
-#ifdef DYNAMIC_HEAP_COUNT
-                        start += GetHighPrecisionTimeStamp();
-#endif //DYNAMIC_HEAP_COUNT
-                    }
-                }
-                else
-                {
-                    safe_switch_to_thread ();
-                }
-            }
-            else
-            {
-#ifdef DYNAMIC_HEAP_COUNT
-                start -= GetHighPrecisionTimeStamp();
-#endif //DYNAMIC_HEAP_COUNT
-                WaitLongerNoInstru (i);
-#ifdef DYNAMIC_HEAP_COUNT
-                start += GetHighPrecisionTimeStamp();
-#endif //DYNAMIC_HEAP_COUNT
-            }
-        }
-#ifdef DYNAMIC_HEAP_COUNT
-        uint64_t end = GetHighPrecisionTimeStamp();
-        Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start);
-        dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time));
-#endif //DYNAMIC_HEAP_COUNT
-    }
-    while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free);
+size_t      gc_heap::gen0_min_budget_from_config = 0;
 
-    return msl_entered;
-}
+size_t      gc_heap::gen0_max_budget_from_config = 0;
 
-inline
-enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl)
-{
-    if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) == lock_free)
-        return msl_entered;
+int         gc_heap::high_mem_percent_from_config = 0;
 
-    return enter_spin_lock_msl_helper (msl);
-}
+bool        gc_heap::use_frozen_segments_p = false;
 
-//
-// We need the following methods to have volatile arguments, so that they can accept
-// raw pointers in addition to the results of the & operator on Volatile<T>.
-// this will never be used for the more_space_lock_xxx, which is why
-// "lock_decommissioned" cannot happen.
-inline
-static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock)
-{
-retry:
+#ifdef FEATURE_LOH_COMPACTION
+gc_heap::etw_loh_compact_info* gc_heap::loh_compact_info;
+#endif //FEATURE_LOH_COMPACTION
+#endif //FEATURE_EVENT_TRACE
 
-    if (Interlocked::CompareExchange(lock, lock_taken, lock_free) != lock_free)
-    {
-        unsigned int i = 0;
-        while (VolatileLoad(lock) != lock_free)
-        {
-            // will never be used for more_space_lock_xxx
-            assert (VolatileLoad(lock) != lock_decommissioned);
-            if ((++i & 7) && !IsGCInProgress())
-            {
-                if  (g_num_processors > 1)
-                {
-#ifndef MULTIPLE_HEAPS
-                    int spin_count = 32 * yp_spin_count_unit;
-#else //!MULTIPLE_HEAPS
-                    int spin_count = yp_spin_count_unit;
-#endif //!MULTIPLE_HEAPS
-                    for (int j = 0; j < spin_count; j++)
-                    {
-                        if  (VolatileLoad(lock) == lock_free || IsGCInProgress())
-                            break;
-                        YieldProcessor();           // indicate to the processor that we are spinning
-                    }
-                    if  (VolatileLoad(lock) != lock_free && !IsGCInProgress())
-                    {
-                        safe_switch_to_thread();
-                    }
-                }
-                else
-                {
-                    safe_switch_to_thread();
-                }
-            }
-            else
-            {
-                WaitLongerNoInstru(i);
-            }
-        }
-        goto retry;
-    }
-}
+bool        gc_heap::hard_limit_config_p = false;
 
-inline
-static BOOL try_enter_spin_lock_noinstru(RAW_KEYWORD(volatile) int32_t* lock)
-{
-    return (Interlocked::CompareExchange(&*lock, lock_taken, lock_free) == lock_free);
-}
+#if defined(SHORT_PLUGS) && !defined(USE_REGIONS)
+double      gc_heap::short_plugs_pad_ratio = 0;
+#endif //SHORT_PLUGS && !USE_REGIONS
 
-inline
-static void leave_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock)
-{
-    VolatileStore<int32_t>((int32_t*)lock, lock_free);
-}
+int         gc_heap::generation_skip_ratio_threshold = 0;
+int         gc_heap::conserve_mem_setting = 0;
+bool        gc_heap::spin_count_unit_config_p = false;
 
-#ifdef _DEBUG
+uint64_t    gc_heap::suspended_start_time = 0;
+uint64_t    gc_heap::end_gc_time = 0;
+uint64_t    gc_heap::total_suspended_time = 0;
+uint64_t    gc_heap::process_start_time = 0;
+last_recorded_gc_info gc_heap::last_ephemeral_gc_info;
+last_recorded_gc_info gc_heap::last_full_blocking_gc_info;
 
-inline
-static void enter_spin_lock (GCSpinLock *pSpinLock)
-{
-    enter_spin_lock_noinstru (&pSpinLock->lock);
-    assert (pSpinLock->holding_thread == (Thread*)-1);
-    pSpinLock->holding_thread = GCToEEInterface::GetThread();
-}
+uint64_t    gc_heap::last_alloc_reset_suspended_end_time = 0;
+size_t      gc_heap::max_peak_heap_size = 0;
+VOLATILE(size_t) gc_heap::llc_size = 0;
 
-inline
-static BOOL try_enter_spin_lock(GCSpinLock *pSpinLock)
-{
-    BOOL ret = try_enter_spin_lock_noinstru(&pSpinLock->lock);
-    if (ret)
-        pSpinLock->holding_thread = GCToEEInterface::GetThread();
-    return ret;
-}
+#ifdef BACKGROUND_GC
+last_recorded_gc_info gc_heap::last_bgc_info[2];
+VOLATILE(bool)        gc_heap::is_last_recorded_bgc = false;
+VOLATILE(int)         gc_heap::last_bgc_info_index = 0;
+#endif //BACKGROUND_GC
 
-inline
-static void leave_spin_lock(GCSpinLock *pSpinLock)
-{
-    bool gc_thread_p = GCToEEInterface::WasCurrentThreadCreatedByGC();
-    pSpinLock->released_by_gc_p = gc_thread_p;
-    pSpinLock->holding_thread = (Thread*) -1;
-    if (pSpinLock->lock != lock_free)
-        leave_spin_lock_noinstru(&pSpinLock->lock);
-}
+#ifdef DYNAMIC_HEAP_COUNT
+size_t      gc_heap::hc_change_cancelled_count_prep = 0;
+#ifdef BACKGROUND_GC
+int         gc_heap::bgc_th_creation_hist_index = 0;
+gc_heap::bgc_thread_creation_history gc_heap::bgc_th_creation_hist[max_bgc_thread_creation_count];
+size_t      gc_heap::bgc_th_count_created = 0;
+size_t      gc_heap::bgc_th_count_created_th_existed = 0;
+size_t      gc_heap::bgc_th_count_creation_failed = 0;
+size_t      gc_heap::bgc_init_gc_index = 0;
+VOLATILE(short) gc_heap::bgc_init_n_heaps = 0;
+size_t      gc_heap::hc_change_cancelled_count_bgc = 0;
+#endif //BACKGROUND_GC
+#endif //DYNAMIC_HEAP_COUNT
 
-#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) \
-    _ASSERTE((pSpinLock)->holding_thread == GCToEEInterface::GetThread());
+#if defined(HOST_64BIT)
+// consider putting this in dynamic data -
+// we may want different values for workstation
+// and server GC.
+size_t      gc_heap::youngest_gen_desired_th;
+#endif //HOST_64BIT
 
-#define ASSERT_NOT_HOLDING_SPIN_LOCK(pSpinLock) \
-    _ASSERTE((pSpinLock)->holding_thread != GCToEEInterface::GetThread());
+uint64_t    gc_heap::mem_one_percent = 0;
 
-#else //_DEBUG
+uint32_t    gc_heap::high_memory_load_th = 0;
 
-//In the concurrent version, the Enable/DisablePreemptiveGC is optional because
-//the gc thread call WaitLonger.
-void WaitLonger (int i
-#ifdef SYNCHRONIZATION_STATS
-    , GCSpinLock* spin_lock
-#endif //SYNCHRONIZATION_STATS
-    )
-{
-#ifdef SYNCHRONIZATION_STATS
-    (spin_lock->num_wait_longer)++;
-#endif //SYNCHRONIZATION_STATS
+uint32_t    gc_heap::m_high_memory_load_th;
 
-    // every 8th attempt:
-    bool bToggleGC = GCToEEInterface::EnablePreemptiveGC();
-    assert (bToggleGC);
+uint32_t    gc_heap::v_high_memory_load_th;
 
-    // if we're waiting for gc to finish, we should block immediately
-    if (!gc_heap::gc_started)
-    {
-#ifdef SYNCHRONIZATION_STATS
-        (spin_lock->num_switch_thread_w)++;
-#endif //SYNCHRONIZATION_STATS
-        if  (g_num_processors > 1)
-        {
-            YieldProcessor();           // indicate to the processor that we are spinning
-            if  (i & 0x01f)
-                GCToOSInterface::YieldThread (0);
-            else
-                GCToOSInterface::Sleep (5);
-        }
-        else
-            GCToOSInterface::Sleep (5);
-    }
+uint32_t    gc_heap::almost_high_memory_load_th;
 
-    // If CLR is hosted, a thread may reach here while it is in preemptive GC mode,
-    // or it has no Thread object, in order to force a task to yield, or to triger a GC.
-    // It is important that the thread is going to wait for GC.  Otherwise the thread
-    // is in a tight loop.  If the thread has high priority, the perf is going to be very BAD.
-    if (gc_heap::gc_started)
-    {
-        gc_heap::wait_for_gc_done();
-    }
+bool        gc_heap::is_restricted_physical_mem;
 
-    if (bToggleGC)
-    {
-#ifdef SYNCHRONIZATION_STATS
-        (spin_lock->num_disable_preemptive_w)++;
-#endif //SYNCHRONIZATION_STATS
-        GCToEEInterface::DisablePreemptiveGC();
-    }
-}
+uint64_t    gc_heap::total_physical_mem = 0;
 
-inline
-static void enter_spin_lock (GCSpinLock* spin_lock)
-{
-retry:
+uint64_t    gc_heap::entry_available_physical_mem = 0;
 
-    if (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) != lock_free)
-    {
-        unsigned int i = 0;
-        while (spin_lock->lock != lock_free)
-        {
-            assert (spin_lock->lock != lock_decommissioned);
-            if ((++i & 7) && !gc_heap::gc_started)
-            {
-                if  (g_num_processors > 1)
-                {
-#ifndef MULTIPLE_HEAPS
-                    int spin_count = 32 * yp_spin_count_unit;
-#else //!MULTIPLE_HEAPS
-                    int spin_count = yp_spin_count_unit;
-#endif //!MULTIPLE_HEAPS
-                    for (int j = 0; j < spin_count; j++)
-                    {
-                        if  (spin_lock->lock == lock_free || gc_heap::gc_started)
-                            break;
-                        YieldProcessor();           // indicate to the processor that we are spinning
-                    }
-                    if  (spin_lock->lock != lock_free && !gc_heap::gc_started)
-                    {
-#ifdef SYNCHRONIZATION_STATS
-                        (spin_lock->num_switch_thread)++;
-#endif //SYNCHRONIZATION_STATS
-                        bool cooperative_mode = gc_heap::enable_preemptive ();
+size_t      gc_heap::heap_hard_limit = 0;
 
-                        GCToOSInterface::YieldThread(0);
+size_t      gc_heap::heap_hard_limit_oh[total_oh_count];
 
-                        gc_heap::disable_preemptive (cooperative_mode);
-                    }
-                }
-                else
-                    GCToOSInterface::YieldThread(0);
-            }
-            else
-            {
-                WaitLonger(i
-#ifdef SYNCHRONIZATION_STATS
-                        , spin_lock
-#endif //SYNCHRONIZATION_STATS
-                    );
-            }
-        }
-        goto retry;
-    }
-}
+#ifdef USE_REGIONS
 
-inline
-static BOOL try_enter_spin_lock(GCSpinLock* spin_lock)
-{
-    return (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) == lock_free);
-}
+size_t      gc_heap::regions_range = 0;
 
-inline
-static void leave_spin_lock (GCSpinLock * spin_lock)
-{
-    spin_lock->lock = lock_free;
-}
+#endif //USE_REGIONS
 
-#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock)
+bool        affinity_config_specified_p = false;
 
-#endif //_DEBUG
+#ifdef USE_REGIONS
+region_allocator global_region_allocator;
+uint8_t*(*initial_regions)[total_generation_count][2] = nullptr;
+size_t      gc_heap::region_count = 0;
 
-bool gc_heap::enable_preemptive ()
-{
-    return GCToEEInterface::EnablePreemptiveGC();
-}
+gc_heap::region_info* gc_heap::map_region_to_generation = nullptr;
+gc_heap::region_info* gc_heap::map_region_to_generation_skewed = nullptr;
 
-void gc_heap::disable_preemptive (bool restore_cooperative)
-{
-    if (restore_cooperative)
-    {
-        GCToEEInterface::DisablePreemptiveGC();
-    }
-}
+#endif //USE_REGIONS
 
-typedef void **  PTR_PTR;
-inline
-void memclr ( uint8_t* mem, size_t size)
-{
-    dprintf (3, ("MEMCLR: %p, %zd", mem, size));
-    assert ((size & (sizeof(PTR_PTR)-1)) == 0);
-    assert (sizeof(PTR_PTR) == DATA_ALIGNMENT);
-    memset (mem, 0, size);
-}
+#ifdef BACKGROUND_GC
+GCEvent     gc_heap::bgc_start_event;
 
+gc_mechanisms gc_heap::saved_bgc_settings;
 
-inline
-ptrdiff_t round_down (ptrdiff_t add, int pitch)
-{
-    return ((add / pitch) * pitch);
-}
+gc_history_global gc_heap::bgc_data_global;
 
-#if defined(FEATURE_STRUCTALIGN) && defined(RESPECT_LARGE_ALIGNMENT)
-// FEATURE_STRUCTALIGN allows the compiler to dictate the alignment,
-// i.e, if a larger alignment matters or is beneficial, the compiler
-// generated info tells us so.  RESPECT_LARGE_ALIGNMENT is just the
-// converse - it's a heuristic for the GC to use a larger alignment.
-#error FEATURE_STRUCTALIGN should imply !RESPECT_LARGE_ALIGNMENT
-#endif
+GCEvent     gc_heap::background_gc_done_event;
 
-#if defined(FEATURE_STRUCTALIGN) && defined(FEATURE_LOH_COMPACTION)
-#error FEATURE_STRUCTALIGN and FEATURE_LOH_COMPACTION are mutually exclusive
-#endif
+GCEvent     gc_heap::ee_proceed_event;
 
-// Returns true if two pointers have the same large (double than normal) alignment.
-inline
-BOOL same_large_alignment_p (uint8_t* p1, uint8_t* p2)
-{
-#ifdef RESPECT_LARGE_ALIGNMENT
-    const size_t LARGE_ALIGNMENT_MASK = 2 * DATA_ALIGNMENT - 1;
-    return ((((size_t)p1 ^ (size_t)p2) & LARGE_ALIGNMENT_MASK) == 0);
-#else
-    UNREFERENCED_PARAMETER(p1);
-    UNREFERENCED_PARAMETER(p2);
-    return TRUE;
-#endif // RESPECT_LARGE_ALIGNMENT
-}
+bool        gc_heap::gc_can_use_concurrent = false;
 
-// Determines the padding size required to fix large alignment during relocation.
-inline
-size_t switch_alignment_size (BOOL already_padded_p)
-{
-#ifndef RESPECT_LARGE_ALIGNMENT
-    assert (!"Should not be called");
-#endif // RESPECT_LARGE_ALIGNMENT
+bool        gc_heap::temp_disable_concurrent_p = false;
 
-    if (already_padded_p)
-        return DATA_ALIGNMENT;
-    else
-        return Align (min_obj_size) | DATA_ALIGNMENT;
-}
+uint32_t    gc_heap::cm_in_progress = FALSE;
 
-#ifdef FEATURE_STRUCTALIGN
-void set_node_aligninfo (uint8_t *node, int requiredAlignment, ptrdiff_t pad);
-void clear_node_aligninfo (uint8_t *node);
-#else // FEATURE_STRUCTALIGN
-#define node_realigned(node)    (((plug_and_reloc*)(node))[-1].reloc & 1)
-void set_node_realigned (uint8_t* node);
-void clear_node_realigned(uint8_t* node);
-#endif // FEATURE_STRUCTALIGN
+BOOL        gc_heap::dont_restart_ee_p = FALSE;
 
-inline
-size_t AlignQword (size_t nbytes)
-{
-#ifdef FEATURE_STRUCTALIGN
-    // This function is used to align everything on the large object
-    // heap to an 8-byte boundary, to reduce the number of unaligned
-    // accesses to (say) arrays of doubles.  With FEATURE_STRUCTALIGN,
-    // the compiler dictates the optimal alignment instead of having
-    // a heuristic in the GC.
-    return Align (nbytes);
-#else // FEATURE_STRUCTALIGN
-    return (nbytes + 7) & ~7;
-#endif // FEATURE_STRUCTALIGN
-}
+BOOL        gc_heap::keep_bgc_threads_p = FALSE;
 
-inline
-BOOL Aligned (size_t n)
-{
-    return (n & ALIGNCONST) == 0;
-}
+GCEvent     gc_heap::bgc_threads_sync_event;
 
-#define OBJECT_ALIGNMENT_OFFSET (sizeof(MethodTable *))
+BOOL        gc_heap::do_ephemeral_gc_p = FALSE;
 
-#ifdef FEATURE_STRUCTALIGN
-#define MAX_STRUCTALIGN OS_PAGE_SIZE
-#else // FEATURE_STRUCTALIGN
-#define MAX_STRUCTALIGN 0
-#endif // FEATURE_STRUCTALIGN
+BOOL        gc_heap::do_concurrent_p = FALSE;
 
-#ifdef FEATURE_STRUCTALIGN
-inline
-ptrdiff_t AdjustmentForMinPadSize(ptrdiff_t pad, int requiredAlignment)
-{
-    // The resulting alignpad must be either 0 or at least min_obj_size.
-    // Note that by computing the following difference on unsigned types,
-    // we can do the range check 0 < alignpad < min_obj_size with a
-    // single conditional branch.
-    if ((size_t)(pad - DATA_ALIGNMENT) < Align (min_obj_size) - DATA_ALIGNMENT)
-    {
-        return requiredAlignment;
-    }
-    return 0;
-}
+size_t      gc_heap::ephemeral_fgc_counts[max_generation];
 
+VOLATILE(c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free;
 
-inline
-ptrdiff_t ComputeStructAlignPad (uint8_t* plug, int requiredAlignment, size_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET)
-{
-    return StructAlign (plug, requiredAlignment, alignmentOffset) - plug;
-}
+VOLATILE(BOOL) gc_heap::gc_background_running = FALSE;
+#endif //BACKGROUND_GC
 
-BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment)
-{
-    return StructAlign (ptr, requiredAlignment) == ptr;
-}
+#ifdef USE_REGIONS
+#ifdef MULTIPLE_HEAPS
+uint8_t*    gc_heap::gc_low;
+uint8_t*    gc_heap::gc_high;
+#endif //MULTIPLE_HEAPS
+VOLATILE(uint8_t*)    gc_heap::ephemeral_low;
+VOLATILE(uint8_t*)    gc_heap::ephemeral_high;
+#endif //USE_REGIONS
 
-inline
-ptrdiff_t ComputeMaxStructAlignPad (int requiredAlignment)
-{
-    if (requiredAlignment == DATA_ALIGNMENT)
-        return 0;
-    // Since a non-zero alignment padding cannot be less than min_obj_size (so we can fit the
-    // alignment padding object), the worst-case alignment padding is correspondingly larger
-    // than the required alignment.
-    return requiredAlignment + Align (min_obj_size) - DATA_ALIGNMENT;
-}
+#ifndef MULTIPLE_HEAPS
+#ifdef SPINLOCK_HISTORY
+int         gc_heap::spinlock_info_index = 0;
+spinlock_info gc_heap::last_spinlock_info[max_saved_spinlock_info];
+allocation_state gc_heap::current_uoh_alloc_state = (allocation_state)-1;
+#endif //SPINLOCK_HISTORY
 
-inline
-ptrdiff_t ComputeMaxStructAlignPadLarge (int requiredAlignment)
-{
-    if (requiredAlignment <= get_alignment_constant (TRUE)+1)
-        return 0;
-    // This is the same as ComputeMaxStructAlignPad, except that in addition to leaving space
-    // for padding before the actual object, it also leaves space for filling a gap after the
-    // actual object.  This is needed on the large object heap, as the outer allocation functions
-    // don't operate on an allocation context (which would have left space for the final gap).
-    return requiredAlignment + Align (min_obj_size) * 2 - DATA_ALIGNMENT;
-}
+uint32_t    gc_heap::fgn_maxgen_percent = 0;
+size_t      gc_heap::fgn_last_alloc = 0;
 
-#else // FEATURE_STRUCTALIGN
-#define ComputeMaxStructAlignPad(requiredAlignment) 0
-#define ComputeMaxStructAlignPadLarge(requiredAlignment) 0
-#endif // FEATURE_STRUCTALIGN
+int         gc_heap::generation_skip_ratio = 100;
+#ifdef FEATURE_CARD_MARKING_STEALING
+VOLATILE(size_t) gc_heap::n_eph_soh = 0;
+VOLATILE(size_t) gc_heap::n_gen_soh = 0;
+VOLATILE(size_t) gc_heap::n_eph_loh = 0;
+VOLATILE(size_t) gc_heap::n_gen_loh = 0;
+#endif //FEATURE_CARD_MARKING_STEALING
 
-//CLR_SIZE  is the max amount of bytes from gen0 that is set to 0 in one chunk
-#ifdef SERVER_GC
-#define CLR_SIZE ((size_t)(8*1024+32))
-#else //SERVER_GC
-#define CLR_SIZE ((size_t)(8*1024+32))
-#endif //SERVER_GC
+uint64_t    gc_heap::loh_alloc_since_cg = 0;
 
-#define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN)
-// When we fit into the free list we need an extra of a min obj
-#define END_SPACE_AFTER_GC_FL (END_SPACE_AFTER_GC + Align (min_obj_size))
+BOOL        gc_heap::elevation_requested = FALSE;
 
-#if defined(BACKGROUND_GC) && !defined(USE_REGIONS)
-#define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE)
-#else
-#define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE)
-#endif //BACKGROUND_GC && !USE_REGIONS
+BOOL        gc_heap::last_gc_before_oom = FALSE;
 
-// This is always power of 2.
-#ifdef HOST_64BIT
-const size_t min_segment_size_hard_limit = 1024*1024*16;
-#else //HOST_64BIT
-const size_t min_segment_size_hard_limit = 1024*1024*4;
-#endif //HOST_64BIT
+BOOL        gc_heap::sufficient_gen0_space_p = FALSE;
 
-#ifndef HOST_64BIT
-// Max size of heap hard limit (2^31) to be able to be aligned and rounded up on power of 2 and not overflow
-const size_t max_heap_hard_limit = (size_t)2 * (size_t)1024 * (size_t)1024 * (size_t)1024;
-#endif //!HOST_64BIT
+BOOL        gc_heap::decide_promote_gen1_pins_p = TRUE;
 
+#ifdef BACKGROUND_GC
+uint8_t*    gc_heap::background_saved_lowest_address = 0;
+uint8_t*    gc_heap::background_saved_highest_address = 0;
+uint8_t*    gc_heap::next_sweep_obj = 0;
+uint8_t*    gc_heap::current_sweep_pos = 0;
+#ifdef DOUBLY_LINKED_FL
+heap_segment* gc_heap::current_sweep_seg = 0;
+#endif //DOUBLY_LINKED_FL
+exclusive_sync* gc_heap::bgc_alloc_lock;
+#endif //BACKGROUND_GC
 
-#ifdef SERVER_GC
+oom_history gc_heap::oom_info;
 
-#ifdef HOST_64BIT
+int         gc_heap::oomhist_index_per_heap = 0;
 
-#define INITIAL_ALLOC ((size_t)((size_t)4*1024*1024*1024))
-#define LHEAP_ALLOC   ((size_t)(1024*1024*256))
+oom_history gc_heap::oomhist_per_heap[max_oom_history_count];
 
-#else
+fgm_history gc_heap::fgm_result;
 
-#define INITIAL_ALLOC ((size_t)(1024*1024*64))
-#define LHEAP_ALLOC   ((size_t)(1024*1024*32))
+size_t      gc_heap::allocated_since_last_gc[total_oh_count];
 
-#endif  // HOST_64BIT
+#ifndef USE_REGIONS
+BOOL        gc_heap::ro_segments_in_range = FALSE;
+uint8_t*    gc_heap::ephemeral_low;
+uint8_t*    gc_heap::ephemeral_high;
+BOOL        gc_heap::ephemeral_promotion;
+uint8_t*    gc_heap::saved_ephemeral_plan_start[ephemeral_generation_count];
+size_t      gc_heap::saved_ephemeral_plan_start_size[ephemeral_generation_count];
+#endif //!USE_REGIONS
 
-#else //SERVER_GC
+uint8_t*    gc_heap::lowest_address;
 
-#ifdef HOST_64BIT
+uint8_t*    gc_heap::highest_address;
 
-#define INITIAL_ALLOC ((size_t)(1024*1024*256))
-#define LHEAP_ALLOC   ((size_t)(1024*1024*128))
+short*      gc_heap::brick_table;
 
-#else
+uint32_t*   gc_heap::card_table;
 
-#define INITIAL_ALLOC ((size_t)(1024*1024*16))
-#define LHEAP_ALLOC   ((size_t)(1024*1024*16))
+#ifdef CARD_BUNDLE
+uint32_t*   gc_heap::card_bundle_table;
+#endif //CARD_BUNDLE
 
-#endif  // HOST_64BIT
+uint8_t*    gc_heap::gc_low = 0;
 
-#endif //SERVER_GC
+uint8_t*    gc_heap::gc_high = 0;
 
-const size_t etw_allocation_tick = 100*1024;
+#ifndef USE_REGIONS
+uint8_t*    gc_heap::demotion_low;
 
-const size_t low_latency_alloc = 256*1024;
+uint8_t*    gc_heap::demotion_high;
 
-const size_t fgn_check_quantum = 2*1024*1024;
+uint8_t*    gc_heap::last_gen1_pin_end;
+#endif //!USE_REGIONS
 
-#ifdef MH_SC_MARK
-const int max_snoop_level = 128;
-#endif //MH_SC_MARK
+gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons;
 
-#ifdef CARD_BUNDLE
-//threshold of heap size to turn on card bundles.
-#define SH_TH_CARD_BUNDLE  (40*1024*1024)
-#define MH_TH_CARD_BUNDLE  (180*1024*1024)
-#endif //CARD_BUNDLE
+size_t      gc_heap::etw_allocation_running_amount[total_oh_count];
 
-// min size to decommit to make the OS call worthwhile
-#define MIN_DECOMMIT_SIZE  (100*OS_PAGE_SIZE)
+uint64_t    gc_heap::total_alloc_bytes_soh = 0;
 
-// max size to decommit per millisecond
-#define DECOMMIT_SIZE_PER_MILLISECOND (160*1024)
+uint64_t    gc_heap::total_alloc_bytes_uoh = 0;
 
-// time in milliseconds between decommit steps
-#define DECOMMIT_TIME_STEP_MILLISECONDS (100)
+int         gc_heap::gc_policy = 0;
 
-inline
-size_t align_on_page (size_t add)
-{
-    return ((add + OS_PAGE_SIZE - 1) & ~((size_t)OS_PAGE_SIZE - 1));
-}
+uint64_t    gc_heap::allocation_running_time;
 
-inline
-uint8_t* align_on_page (uint8_t* add)
-{
-    return (uint8_t*)align_on_page ((size_t) add);
-}
+size_t      gc_heap::allocation_running_amount;
 
-inline
-size_t align_lower_page (size_t add)
-{
-    return (add & ~((size_t)OS_PAGE_SIZE - 1));
-}
+heap_segment* gc_heap::ephemeral_heap_segment = 0;
 
-inline
-uint8_t* align_lower_page (uint8_t* add)
-{
-    return (uint8_t*)align_lower_page ((size_t)add);
-}
+#ifdef USE_REGIONS
+#ifdef STRESS_REGIONS
+OBJECTHANDLE* gc_heap::pinning_handles_for_alloc = 0;
+int         gc_heap::ph_index_per_heap = 0;
+int         gc_heap::pinning_seg_interval = 2;
+size_t      gc_heap::num_gen0_regions = 0;
+int         gc_heap::sip_seg_interval = 0;
+int         gc_heap::sip_seg_maxgen_interval = 0;
+size_t      gc_heap::num_condemned_regions = 0;
+#endif //STRESS_REGIONS
 
-inline
-size_t align_write_watch_lower_page (size_t add)
-{
-    return (add & ~(WRITE_WATCH_UNIT_SIZE - 1));
-}
+region_free_list gc_heap::free_regions[count_free_region_kinds];
 
-inline
-uint8_t* align_write_watch_lower_page (uint8_t* add)
-{
-    return (uint8_t*)align_lower_page ((size_t)add);
-}
+int         gc_heap::num_regions_freed_in_sweep = 0;
 
-inline
-BOOL power_of_two_p (size_t integer)
-{
-    return !(integer & (integer-1));
-}
+int         gc_heap::regions_per_gen[max_generation + 1];
 
+int         gc_heap::planned_regions_per_gen[max_generation + 1];
 
+int         gc_heap::sip_maxgen_regions_per_gen[max_generation + 1];
 
+heap_segment* gc_heap::reserved_free_regions_sip[max_generation];
 
+int         gc_heap::new_gen0_regions_in_plns = 0;
+int         gc_heap::new_regions_in_prr = 0;
+int         gc_heap::new_regions_in_threading = 0;
 
-#ifdef USE_REGIONS
-void region_write_barrier_settings (WriteBarrierParameters* args,
-                                    gc_heap::region_info* map_region_to_generation_skewed,
-                                    uint8_t region_shr)
-{
-    switch (GCConfig::GetGCWriteBarrier())
-    {
-    default:
-    case GCConfig::WRITE_BARRIER_DEFAULT:
-    case GCConfig::WRITE_BARRIER_REGION_BIT:
-        // bitwise region write barrier is the default now
-        args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed;
-        args->region_shr = region_shr;
-        args->region_use_bitwise_write_barrier = true;
-        break;
+size_t      gc_heap::end_gen0_region_space = 0;
 
-    case GCConfig::WRITE_BARRIER_REGION_BYTE:
-        // bytewise region write barrier
-        args->region_to_generation_table = (uint8_t*)map_region_to_generation_skewed;
-        args->region_shr = region_shr;
-        assert (args->region_use_bitwise_write_barrier == false);
-        break;
+size_t      gc_heap::end_gen0_region_committed_space = 0;
 
-    case GCConfig::WRITE_BARRIER_SERVER:
-        // server write barrier
-        // args should have been zero initialized
-        assert (args->region_use_bitwise_write_barrier == false);
-        assert (args->region_to_generation_table == nullptr);
-        assert (args->region_shr == 0);
-        break;
-    }
-}
-#endif //USE_REGIONS
+size_t      gc_heap::gen0_pinned_free_space = 0;
 
-void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high
-#ifdef USE_REGIONS
-                                   , gc_heap::region_info* map_region_to_generation_skewed
-                                   , uint8_t region_shr
-#endif //USE_REGIONS
-                                   )
-{
-#ifndef USE_REGIONS
-    initGCShadow();
-#endif
+bool        gc_heap::gen0_large_chunk_found = false;
 
-    WriteBarrierParameters args = {};
-    args.operation = WriteBarrierOp::StompEphemeral;
-    args.is_runtime_suspended = true;
-    args.ephemeral_low = ephemeral_low;
-    args.ephemeral_high = ephemeral_high;
-#ifdef USE_REGIONS
-    region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr);
-#endif //USE_REGIONS
-    GCToEEInterface::StompWriteBarrier(&args);
-}
+size_t*     gc_heap::survived_per_region = nullptr;
 
-void stomp_write_barrier_initialize(uint8_t* ephemeral_low, uint8_t* ephemeral_high
-#ifdef USE_REGIONS
-                                   , gc_heap::region_info* map_region_to_generation_skewed
-                                   , uint8_t region_shr
+size_t*     gc_heap::old_card_survived_per_region = nullptr;
 #endif //USE_REGIONS
-                                   )
-{
-    WriteBarrierParameters args = {};
-    args.operation = WriteBarrierOp::Initialize;
-    args.is_runtime_suspended = true;
-    args.requires_upper_bounds_check = false;
-    args.card_table = g_gc_card_table;
 
-#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
-    args.card_bundle_table = g_gc_card_bundle_table;
-#endif
+BOOL        gc_heap::blocking_collection = FALSE;
 
-    args.lowest_address = g_gc_lowest_address;
-    args.highest_address = g_gc_highest_address;
-    args.ephemeral_low = ephemeral_low;
-    args.ephemeral_high = ephemeral_high;
+heap_segment* gc_heap::freeable_uoh_segment = 0;
 
-#ifdef USE_REGIONS
-    region_write_barrier_settings (&args, map_region_to_generation_skewed, region_shr);
-#endif //USE_REGIONS
+uint64_t    gc_heap::time_bgc_last = 0;
 
-    GCToEEInterface::StompWriteBarrier(&args);
-}
+size_t      gc_heap::mark_stack_tos = 0;
 
-//extract the low bits [0,low[ of a uint32_t
-#define lowbits(wrd, bits) ((wrd) & ((1 << (bits))-1))
-//extract the high bits [high, 32] of a uint32_t
-#define highbits(wrd, bits) ((wrd) & ~((1 << (bits))-1))
+size_t      gc_heap::mark_stack_bos = 0;
 
-class mark;
-class generation;
-class heap_segment;
-class CObjectHeader;
-class dynamic_data;
-class l_heap;
-class sorted_table;
-class c_synchronize;
+size_t      gc_heap::mark_stack_array_length = 0;
 
-#ifdef FEATURE_PREMORTEM_FINALIZATION
-static
-HRESULT AllocateCFinalize(CFinalize **pCFinalize);
-#endif // FEATURE_PREMORTEM_FINALIZATION
+mark*       gc_heap::mark_stack_array = 0;
 
-uint8_t* tree_search (uint8_t* tree, uint8_t* old_address);
+#if defined (_DEBUG) && defined (VERIFY_HEAP)
+BOOL        gc_heap::verify_pinned_queue_p = FALSE;
+#endif //_DEBUG && VERIFY_HEAP
 
+uint8_t*    gc_heap::oldest_pinned_plug = 0;
 
-#ifdef USE_INTROSORT
-#define _sort introsort::sort
-#elif defined(USE_VXSORT)
-// in this case we have do_vxsort which takes an additional range that
-// all items to be sorted are contained in
-// so do not #define _sort
-#else //USE_INTROSORT
-#define _sort qsort1
-void qsort1(uint8_t** low, uint8_t** high, unsigned int depth);
-#endif //USE_INTROSORT
+size_t      gc_heap::num_pinned_objects = 0;
 
-void* virtual_alloc (size_t size);
-void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node = NUMA_NODE_UNDEFINED);
+#ifdef FEATURE_LOH_COMPACTION
+size_t      gc_heap::loh_pinned_queue_tos = 0;
 
-/* per heap static initialization */
-#if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS)
-uint32_t*   gc_heap::mark_array;
-#endif //BACKGROUND_GC && !MULTIPLE_HEAPS
+size_t      gc_heap::loh_pinned_queue_bos = 0;
 
-uint8_t**   gc_heap::g_mark_list;
-uint8_t**   gc_heap::g_mark_list_copy;
-size_t      gc_heap::mark_list_size;
-size_t      gc_heap::g_mark_list_total_size;
-bool        gc_heap::mark_list_overflow;
-#ifdef USE_REGIONS
-uint8_t***  gc_heap::g_mark_list_piece;
-size_t      gc_heap::g_mark_list_piece_size;
-size_t      gc_heap::g_mark_list_piece_total_size;
-#endif //USE_REGIONS
+size_t      gc_heap::loh_pinned_queue_length = 0;
 
-seg_mapping* seg_mapping_table;
+mark*       gc_heap::loh_pinned_queue = 0;
 
-#ifdef FEATURE_BASICFREEZE
-sorted_table* gc_heap::seg_table;
-#endif //FEATURE_BASICFREEZE
+BOOL        gc_heap::loh_compacted_p = FALSE;
+#endif //FEATURE_LOH_COMPACTION
 
-#ifdef MULTIPLE_HEAPS
-GCEvent     gc_heap::ee_suspend_event;
-size_t      gc_heap::min_gen0_balance_delta = 0;
-size_t      gc_heap::min_balance_threshold = 0;
-#endif //MULTIPLE_HEAPS
+#ifdef BACKGROUND_GC
 
-VOLATILE(BOOL) gc_heap::gc_started;
+EEThreadId  gc_heap::bgc_thread_id;
 
-#ifdef MULTIPLE_HEAPS
-GCEvent     gc_heap::gc_start_event;
-bool        gc_heap::gc_thread_no_affinitize_p = false;
-uintptr_t   process_mask = 0;
+uint8_t*    gc_heap::background_written_addresses [array_size+2];
 
-int         gc_heap::n_heaps;       // current number of heaps
-int         gc_heap::n_max_heaps;   // maximum number of heaps
+heap_segment* gc_heap::freeable_soh_segment = 0;
 
-gc_heap**   gc_heap::g_heaps;
+size_t      gc_heap::bgc_overflow_count = 0;
 
-#if !defined(USE_REGIONS) || defined(_DEBUG)
-size_t*     gc_heap::g_promoted;
-#endif //!USE_REGIONS || _DEBUG
+size_t      gc_heap::bgc_begin_uoh_size[uoh_generation_count] = {};
+size_t      gc_heap::bgc_uoh_current_size[uoh_generation_count] = {};
+size_t      gc_heap::end_uoh_size[uoh_generation_count] = {};
 
-#ifdef MH_SC_MARK
-int*        gc_heap::g_mark_stack_busy;
-#endif //MH_SC_MARK
+size_t      gc_heap::uoh_a_no_bgc[uoh_generation_count] = {};
+size_t      gc_heap::uoh_a_bgc_marking[uoh_generation_count] = {};
+size_t      gc_heap::uoh_a_bgc_planning[uoh_generation_count] = {};
+#ifdef BGC_SERVO_TUNING
+size_t      gc_heap::bgc_maxgen_end_fl_size = 0;
+#endif //BGC_SERVO_TUNING
 
-#ifdef BACKGROUND_GC
-size_t*     gc_heap::g_bpromoted;
-#endif //BACKGROUND_GC
+size_t      gc_heap::background_soh_size_end_mark = 0;
 
-BOOL        gc_heap::gradual_decommit_in_progress_p = FALSE;
-size_t      gc_heap::max_decommit_step_size = 0;
-#else  //MULTIPLE_HEAPS
+size_t      gc_heap::background_soh_alloc_count = 0;
 
-#if !defined(USE_REGIONS) || defined(_DEBUG)
-size_t      gc_heap::g_promoted;
-#endif //!USE_REGIONS || _DEBUG
+uint8_t**   gc_heap::background_mark_stack_tos = 0;
 
-#ifdef BACKGROUND_GC
-size_t      gc_heap::g_bpromoted;
-#endif //BACKGROUND_GC
+uint8_t**   gc_heap::background_mark_stack_array = 0;
 
-// this is just to have fewer #ifdefs in code shared between WKS and SVR
-// for filling out ScanContext structs
-const int n_heaps = 1;
+size_t      gc_heap::background_mark_stack_array_length = 0;
 
-#endif //MULTIPLE_HEAPS
+BOOL        gc_heap::processed_eph_overflow_p = FALSE;
 
-size_t      gc_heap::card_table_element_layout[total_bookkeeping_elements + 1];
-uint8_t*    gc_heap::bookkeeping_start = nullptr;
 #ifdef USE_REGIONS
-uint8_t*    gc_heap::bookkeeping_covered_committed = nullptr;
-size_t      gc_heap::bookkeeping_sizes[total_bookkeeping_elements];
-#endif //USE_REGIONS
-
-size_t      gc_heap::reserved_memory = 0;
-size_t      gc_heap::reserved_memory_limit = 0;
-BOOL        gc_heap::g_low_memory_status;
+BOOL        gc_heap::background_overflow_p = FALSE;
+#else //USE_REGIONS
+uint8_t*    gc_heap::background_min_overflow_address =0;
 
-static gc_reason gc_trigger_reason = reason_empty;
+uint8_t*    gc_heap::background_max_overflow_address =0;
 
-gc_latency_level gc_heap::latency_level = latency_level_default;
+uint8_t*    gc_heap::background_min_soh_overflow_address =0;
 
-gc_mechanisms  gc_heap::settings;
+uint8_t*    gc_heap::background_max_soh_overflow_address =0;
 
-gc_history_global gc_heap::gc_data_global;
+heap_segment* gc_heap::saved_overflow_ephemeral_seg = 0;
 
-uint64_t    gc_heap::gc_last_ephemeral_decommit_time = 0;
+heap_segment* gc_heap::saved_sweep_ephemeral_seg = 0;
 
-CLRCriticalSection gc_heap::check_commit_cs;
+uint8_t*    gc_heap::saved_sweep_ephemeral_start = 0;
+#endif //USE_REGIONS
 
-#ifdef COMMITTED_BYTES_SHADOW
-CLRCriticalSection gc_heap::decommit_lock;
-#endif //COMMITTED_BYTES_SHADOW
+Thread*     gc_heap::bgc_thread = 0;
 
-size_t      gc_heap::current_total_committed = 0;
+uint8_t**   gc_heap::c_mark_list = 0;
 
-size_t      gc_heap::committed_by_oh[recorded_committed_bucket_counts];
+size_t      gc_heap::c_mark_list_length = 0;
 
-size_t      gc_heap::current_total_committed_bookkeeping = 0;
+size_t      gc_heap::c_mark_list_index = 0;
 
-BOOL        gc_heap::reset_mm_p = TRUE;
+gc_history_per_heap gc_heap::bgc_data_per_heap;
 
-#ifdef FEATURE_EVENT_TRACE
-bool gc_heap::informational_event_enabled_p = false;
+BOOL    gc_heap::bgc_thread_running;
 
-uint64_t*   gc_heap::gc_time_info = 0;
+CLRCriticalSection gc_heap::bgc_threads_timeout_cs;
 
-#ifdef BACKGROUND_GC
-uint64_t*   gc_heap::bgc_time_info = 0;
 #endif //BACKGROUND_GC
 
-size_t      gc_heap::physical_memory_from_config = 0;
+uint8_t**   gc_heap::mark_list;
+uint8_t**   gc_heap::mark_list_index;
+uint8_t**   gc_heap::mark_list_end;
 
-size_t      gc_heap::gen0_min_budget_from_config = 0;
+#ifdef SNOOP_STATS
+snoop_stats_data gc_heap::snoop_stat;
+#endif //SNOOP_STATS
 
-size_t      gc_heap::gen0_max_budget_from_config = 0;
+uint8_t*    gc_heap::min_overflow_address = MAX_PTR;
 
-int         gc_heap::high_mem_percent_from_config = 0;
+uint8_t*    gc_heap::max_overflow_address = 0;
 
-bool        gc_heap::use_frozen_segments_p = false;
+uint8_t*    gc_heap::shigh = 0;
 
-#ifdef FEATURE_LOH_COMPACTION
-gc_heap::etw_loh_compact_info* gc_heap::loh_compact_info;
-#endif //FEATURE_LOH_COMPACTION
-#endif //FEATURE_EVENT_TRACE
+uint8_t*    gc_heap::slow = MAX_PTR;
 
-bool        gc_heap::hard_limit_config_p = false;
+#ifndef USE_REGIONS
+size_t      gc_heap::ordered_free_space_indices[MAX_NUM_BUCKETS];
 
-#if defined(SHORT_PLUGS) && !defined(USE_REGIONS)
-double      gc_heap::short_plugs_pad_ratio = 0;
-#endif //SHORT_PLUGS && !USE_REGIONS
+size_t      gc_heap::saved_ordered_free_space_indices[MAX_NUM_BUCKETS];
 
-int         gc_heap::generation_skip_ratio_threshold = 0;
-int         gc_heap::conserve_mem_setting = 0;
-bool        gc_heap::spin_count_unit_config_p = false;
+size_t      gc_heap::ordered_plug_indices[MAX_NUM_BUCKETS];
 
-uint64_t    gc_heap::suspended_start_time = 0;
-uint64_t    gc_heap::end_gc_time = 0;
-uint64_t    gc_heap::total_suspended_time = 0;
-uint64_t    gc_heap::process_start_time = 0;
-last_recorded_gc_info gc_heap::last_ephemeral_gc_info;
-last_recorded_gc_info gc_heap::last_full_blocking_gc_info;
+size_t      gc_heap::saved_ordered_plug_indices[MAX_NUM_BUCKETS];
 
-uint64_t    gc_heap::last_alloc_reset_suspended_end_time = 0;
-size_t      gc_heap::max_peak_heap_size = 0;
-VOLATILE(size_t) gc_heap::llc_size = 0;
+BOOL        gc_heap::ordered_plug_indices_init = FALSE;
 
-#ifdef BACKGROUND_GC
-last_recorded_gc_info gc_heap::last_bgc_info[2];
-VOLATILE(bool)        gc_heap::is_last_recorded_bgc = false;
-VOLATILE(int)         gc_heap::last_bgc_info_index = 0;
-#endif //BACKGROUND_GC
+BOOL        gc_heap::use_bestfit = FALSE;
 
-#ifdef DYNAMIC_HEAP_COUNT
-size_t      gc_heap::hc_change_cancelled_count_prep = 0;
-#ifdef BACKGROUND_GC
-int         gc_heap::bgc_th_creation_hist_index = 0;
-gc_heap::bgc_thread_creation_history gc_heap::bgc_th_creation_hist[max_bgc_thread_creation_count];
-size_t      gc_heap::bgc_th_count_created = 0;
-size_t      gc_heap::bgc_th_count_created_th_existed = 0;
-size_t      gc_heap::bgc_th_count_creation_failed = 0;
-size_t      gc_heap::bgc_init_gc_index = 0;
-VOLATILE(short) gc_heap::bgc_init_n_heaps = 0;
-size_t      gc_heap::hc_change_cancelled_count_bgc = 0;
-#endif //BACKGROUND_GC
-#endif //DYNAMIC_HEAP_COUNT
+uint8_t*    gc_heap::bestfit_first_pin = 0;
 
-#if defined(HOST_64BIT)
-#define MAX_ALLOWED_MEM_LOAD 85
+BOOL        gc_heap::commit_end_of_seg = FALSE;
 
-// consider putting this in dynamic data -
-// we may want different values for workstation
-// and server GC.
-#define MIN_YOUNGEST_GEN_DESIRED (16*1024*1024)
+size_t      gc_heap::max_free_space_items = 0;
 
-size_t      gc_heap::youngest_gen_desired_th;
-#endif //HOST_64BIT
+size_t      gc_heap::free_space_buckets = 0;
 
-uint64_t    gc_heap::mem_one_percent = 0;
+size_t      gc_heap::free_space_items = 0;
 
-uint32_t    gc_heap::high_memory_load_th = 0;
+int         gc_heap::trimmed_free_space_index = 0;
 
-uint32_t    gc_heap::m_high_memory_load_th;
+size_t      gc_heap::total_ephemeral_plugs = 0;
 
-uint32_t    gc_heap::v_high_memory_load_th;
+seg_free_spaces* gc_heap::bestfit_seg = 0;
 
-uint32_t    gc_heap::almost_high_memory_load_th;
+size_t      gc_heap::total_ephemeral_size = 0;
+#endif //!USE_REGIONS
 
-bool        gc_heap::is_restricted_physical_mem;
+#ifdef HEAP_ANALYZE
 
-uint64_t    gc_heap::total_physical_mem = 0;
+size_t      gc_heap::internal_root_array_length = initial_internal_roots;
 
-uint64_t    gc_heap::entry_available_physical_mem = 0;
+uint8_t**   gc_heap::internal_root_array = 0;
 
-size_t      gc_heap::heap_hard_limit = 0;
+size_t      gc_heap::internal_root_array_index = 0;
 
-size_t      gc_heap::heap_hard_limit_oh[total_oh_count];
+BOOL        gc_heap::heap_analyze_success = TRUE;
 
-#ifdef USE_REGIONS
+uint8_t*    gc_heap::current_obj = 0;
+size_t      gc_heap::current_obj_size = 0;
 
-size_t      gc_heap::regions_range = 0;
+#endif //HEAP_ANALYZE
 
-#endif //USE_REGIONS
+#ifdef GC_CONFIG_DRIVEN
+size_t gc_heap::interesting_data_per_gc[max_idp_count];
+//size_t gc_heap::interesting_data_per_heap[max_idp_count];
+//size_t gc_heap::interesting_mechanisms_per_heap[max_im_count];
+#endif //GC_CONFIG_DRIVEN
+#endif //MULTIPLE_HEAPS
 
-bool        affinity_config_specified_p = false;
+no_gc_region_info gc_heap::current_no_gc_region_info;
+FinalizerWorkItem* gc_heap::finalizer_work;
+BOOL gc_heap::proceed_with_gc_p = FALSE;
+GCSpinLock gc_heap::gc_lock;
 
-#ifdef USE_REGIONS
-region_allocator global_region_allocator;
-uint8_t*(*initial_regions)[total_generation_count][2] = nullptr;
-size_t      gc_heap::region_count = 0;
+#ifdef FEATURE_JAVAMARSHAL
+uint8_t**   gc_heap::global_bridge_list;
+size_t      gc_heap::num_global_bridge_objs;
+#endif //FEATURE_JAVAMARSHAL
 
-gc_heap::region_info* gc_heap::map_region_to_generation = nullptr;
-gc_heap::region_info* gc_heap::map_region_to_generation_skewed = nullptr;
+#ifdef BACKGROUND_GC
+uint64_t gc_heap::total_uoh_a_last_bgc = 0;
+#endif //BACKGROUND_GC
 
+#ifdef USE_REGIONS
+region_free_list gc_heap::global_regions_to_decommit[count_free_region_kinds];
+region_free_list gc_heap::global_free_huge_regions;
+#else //USE_REGIONS
+size_t gc_heap::eph_gen_starts_size = 0;
+heap_segment* gc_heap::segment_standby_list;
 #endif //USE_REGIONS
+bool          gc_heap::use_large_pages_p = 0;
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+size_t        gc_heap::last_gc_end_time_us = 0;
+#endif //HEAP_BALANCE_INSTRUMENTATION
+#ifdef USE_REGIONS
+bool          gc_heap::enable_special_regions_p = false;
+#else //USE_REGIONS
+size_t        gc_heap::min_segment_size = 0;
+size_t        gc_heap::min_uoh_segment_size = 0;
+#endif //!USE_REGIONS
+size_t        gc_heap::min_segment_size_shr = 0;
+size_t        gc_heap::soh_segment_size = 0;
+size_t        gc_heap::segment_info_size = 0;
 
-#ifdef BACKGROUND_GC
-GCEvent     gc_heap::bgc_start_event;
-
-gc_mechanisms gc_heap::saved_bgc_settings;
+#ifdef GC_CONFIG_DRIVEN
+size_t gc_heap::compact_or_sweep_gcs[2];
+#endif //GC_CONFIG_DRIVEN
 
-gc_history_global gc_heap::bgc_data_global;
+#ifdef FEATURE_LOH_COMPACTION
+BOOL                   gc_heap::loh_compaction_always_p = FALSE;
+gc_loh_compaction_mode gc_heap::loh_compaction_mode = loh_compaction_default;
+#endif //FEATURE_LOH_COMPACTION
 
-GCEvent     gc_heap::background_gc_done_event;
+GCEvent gc_heap::full_gc_approach_event;
 
-GCEvent     gc_heap::ee_proceed_event;
+GCEvent gc_heap::full_gc_end_event;
 
-bool        gc_heap::gc_can_use_concurrent = false;
+uint32_t gc_heap::fgn_loh_percent = 0;
 
-bool        gc_heap::temp_disable_concurrent_p = false;
+#ifdef BACKGROUND_GC
+BOOL gc_heap::fgn_last_gc_was_concurrent = FALSE;
+#endif //BACKGROUND_GC
 
-uint32_t    gc_heap::cm_in_progress = FALSE;
+VOLATILE(bool) gc_heap::full_gc_approach_event_set;
 
-BOOL        gc_heap::dont_restart_ee_p = FALSE;
+size_t gc_heap::full_gc_counts[gc_type_max];
 
-BOOL        gc_heap::keep_bgc_threads_p = FALSE;
+bool gc_heap::maxgen_size_inc_p = false;
 
-GCEvent     gc_heap::bgc_threads_sync_event;
+#ifndef USE_REGIONS
+BOOL gc_heap::should_expand_in_full_gc = FALSE;
+#endif //!USE_REGIONS
 
-BOOL        gc_heap::do_ephemeral_gc_p = FALSE;
+#ifdef DYNAMIC_HEAP_COUNT
+int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default;
+gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data;
+size_t gc_heap::current_total_soh_stable_size = 0;
+uint64_t gc_heap::last_suspended_end_time = 0;
+uint64_t gc_heap::change_heap_count_time = 0;
+uint64_t gc_heap::total_change_heap_count = 0;
+uint64_t gc_heap::total_change_heap_count_time = 0;
+size_t gc_heap::gc_index_full_gc_end = 0;
+uint64_t gc_heap::before_distribute_free_regions_time = 0;
+bool gc_heap::trigger_initial_gen2_p = false;
 
-BOOL        gc_heap::do_concurrent_p = FALSE;
-
-size_t      gc_heap::ephemeral_fgc_counts[max_generation];
+#ifdef BACKGROUND_GC
+bool gc_heap::trigger_bgc_for_rethreading_p = false;
+int gc_heap::total_bgc_threads = 0;
+int gc_heap::last_bgc_n_heaps = 0;
+int gc_heap::last_total_bgc_threads = 0;
+#endif //BACKGROUND_GC
 
-VOLATILE(c_gc_state) gc_heap::current_c_gc_state = c_gc_state_free;
+#ifdef STRESS_DYNAMIC_HEAP_COUNT
+int gc_heap::heaps_in_this_gc = 0;
+int gc_heap::bgc_to_ngc2_ratio = 0;
+#endif //STRESS_DYNAMIC_HEAP_COUNT
+#endif // DYNAMIC_HEAP_COUNT
 
-VOLATILE(BOOL) gc_heap::gc_background_running = FALSE;
-#endif //BACKGROUND_GC
+// Provisional mode related stuff.
+bool gc_heap::provisional_mode_triggered = false;
+bool gc_heap::pm_trigger_full_gc = false;
+size_t gc_heap::provisional_triggered_gc_count = 0;
+size_t gc_heap::provisional_off_gc_count = 0;
+size_t gc_heap::num_provisional_triggered = 0;
+bool   gc_heap::pm_stress_on = false;
 
-#ifdef USE_REGIONS
-#ifdef MULTIPLE_HEAPS
-uint8_t*    gc_heap::gc_low;
-uint8_t*    gc_heap::gc_high;
-#endif //MULTIPLE_HEAPS
-VOLATILE(uint8_t*)    gc_heap::ephemeral_low;
-VOLATILE(uint8_t*)    gc_heap::ephemeral_high;
-#endif //USE_REGIONS
+#ifdef HEAP_ANALYZE
+BOOL        gc_heap::heap_analyze_enabled = FALSE;
+#endif //HEAP_ANALYZE
 
 #ifndef MULTIPLE_HEAPS
-#ifdef SPINLOCK_HISTORY
-int         gc_heap::spinlock_info_index = 0;
-spinlock_info gc_heap::last_spinlock_info[max_saved_spinlock_info];
-allocation_state gc_heap::current_uoh_alloc_state = (allocation_state)-1;
-#endif //SPINLOCK_HISTORY
 
-uint32_t    gc_heap::fgn_maxgen_percent = 0;
-size_t      gc_heap::fgn_last_alloc = 0;
+alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST - 1];
+alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST - 1];
+alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST - 1];
 
-int         gc_heap::generation_skip_ratio = 100;
-#ifdef FEATURE_CARD_MARKING_STEALING
-VOLATILE(size_t) gc_heap::n_eph_soh = 0;
-VOLATILE(size_t) gc_heap::n_gen_soh = 0;
-VOLATILE(size_t) gc_heap::n_eph_loh = 0;
-VOLATILE(size_t) gc_heap::n_gen_loh = 0;
-#endif //FEATURE_CARD_MARKING_STEALING
+#ifdef DOUBLY_LINKED_FL
+// size we removed with no undo; only for recording purpose
+size_t gc_heap::gen2_removed_no_undo = 0;
+size_t gc_heap::saved_pinned_plug_index = INVALID_SAVED_PINNED_PLUG_INDEX;
+#endif //DOUBLY_LINKED_FL
 
-uint64_t    gc_heap::loh_alloc_since_cg = 0;
+#ifdef FEATURE_EVENT_TRACE
+etw_bucket_info gc_heap::bucket_info[NUM_GEN2_ALIST];
+#endif //FEATURE_EVENT_TRACE
 
-BOOL        gc_heap::elevation_requested = FALSE;
+dynamic_data gc_heap::dynamic_data_table [total_generation_count];
+gc_history_per_heap gc_heap::gc_data_per_heap;
+size_t gc_heap::total_promoted_bytes = 0;
+size_t gc_heap::finalization_promoted_bytes = 0;
+size_t gc_heap::maxgen_pinned_compact_before_advance = 0;
 
-BOOL        gc_heap::last_gc_before_oom = FALSE;
+uint8_t* gc_heap::alloc_allocated = 0;
 
-BOOL        gc_heap::sufficient_gen0_space_p = FALSE;
+size_t gc_heap::allocation_quantum = CLR_SIZE;
 
-BOOL        gc_heap::decide_promote_gen1_pins_p = TRUE;
+GCSpinLock gc_heap::more_space_lock_soh;
+GCSpinLock gc_heap::more_space_lock_uoh;
 
 #ifdef BACKGROUND_GC
-uint8_t*    gc_heap::background_saved_lowest_address = 0;
-uint8_t*    gc_heap::background_saved_highest_address = 0;
-uint8_t*    gc_heap::next_sweep_obj = 0;
-uint8_t*    gc_heap::current_sweep_pos = 0;
-#ifdef DOUBLY_LINKED_FL
-heap_segment* gc_heap::current_sweep_seg = 0;
-#endif //DOUBLY_LINKED_FL
-exclusive_sync* gc_heap::bgc_alloc_lock;
+VOLATILE(int32_t) gc_heap::uoh_alloc_thread_count = 0;
 #endif //BACKGROUND_GC
 
-oom_history gc_heap::oom_info;
-
-int         gc_heap::oomhist_index_per_heap = 0;
+#ifdef SYNCHRONIZATION_STATS
+unsigned int gc_heap::good_suspension = 0;
+unsigned int gc_heap::bad_suspension = 0;
+uint64_t     gc_heap::total_msl_acquire = 0;
+unsigned int gc_heap::num_msl_acquired = 0;
+unsigned int gc_heap::num_high_msl_acquire = 0;
+unsigned int gc_heap::num_low_msl_acquire = 0;
+#endif //SYNCHRONIZATION_STATS
 
-oom_history gc_heap::oomhist_per_heap[max_oom_history_count];
+size_t   gc_heap::alloc_contexts_used = 0;
+size_t   gc_heap::soh_allocation_no_gc = 0;
+size_t   gc_heap::loh_allocation_no_gc = 0;
+bool     gc_heap::no_gc_oom_p = false;
+heap_segment* gc_heap::saved_loh_segment_no_gc = 0;
 
-fgm_history gc_heap::fgm_result;
+#endif //MULTIPLE_HEAPS
 
-size_t      gc_heap::allocated_since_last_gc[total_oh_count];
+#ifndef MULTIPLE_HEAPS
 
-#ifndef USE_REGIONS
-BOOL        gc_heap::ro_segments_in_range = FALSE;
-uint8_t*    gc_heap::ephemeral_low;
-uint8_t*    gc_heap::ephemeral_high;
-BOOL        gc_heap::ephemeral_promotion;
-uint8_t*    gc_heap::saved_ephemeral_plan_start[ephemeral_generation_count];
-size_t      gc_heap::saved_ephemeral_plan_start_size[ephemeral_generation_count];
-#endif //!USE_REGIONS
+BOOL        gc_heap::gen0_bricks_cleared = FALSE;
 
-uint8_t*    gc_heap::lowest_address;
+int         gc_heap::gen0_must_clear_bricks = 0;
 
-uint8_t*    gc_heap::highest_address;
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+CFinalize*  gc_heap::finalize_queue = 0;
+#endif // FEATURE_PREMORTEM_FINALIZATION
 
-short*      gc_heap::brick_table;
+#ifdef FEATURE_CARD_MARKING_STEALING
+VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh;
+VOLATILE(bool) gc_heap::card_mark_done_soh;
+VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh;
+VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh;
+VOLATILE(bool) gc_heap::card_mark_done_uoh;
+#endif // FEATURE_CARD_MARKING_STEALING
 
-uint32_t*   gc_heap::card_table;
+generation gc_heap::generation_table [total_generation_count];
 
-#ifdef CARD_BUNDLE
-uint32_t*   gc_heap::card_bundle_table;
-#endif //CARD_BUNDLE
+size_t     gc_heap::interesting_data_per_heap[max_idp_count];
 
-uint8_t*    gc_heap::gc_low = 0;
+size_t     gc_heap::compact_reasons_per_heap[max_compact_reasons_count];
 
-uint8_t*    gc_heap::gc_high = 0;
+size_t     gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count];
 
-#ifndef USE_REGIONS
-uint8_t*    gc_heap::demotion_low;
+size_t     gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count];
 
-uint8_t*    gc_heap::demotion_high;
+mark_queue_t gc_heap::mark_queue;
 
-uint8_t*    gc_heap::last_gen1_pin_end;
-#endif //!USE_REGIONS
+#ifdef USE_REGIONS
+bool gc_heap::special_sweep_p = false;
+#endif //USE_REGIONS
 
-gen_to_condemn_tuning gc_heap::gen_to_condemn_reasons;
+int gc_heap::loh_pinned_queue_decay = LOH_PIN_DECAY;
 
-size_t      gc_heap::etw_allocation_running_amount[total_oh_count];
+#endif // MULTIPLE_HEAPS
 
-uint64_t    gc_heap::total_alloc_bytes_soh = 0;
+/* end of per heap static initialization */
 
-uint64_t    gc_heap::total_alloc_bytes_uoh = 0;
+#ifdef USE_REGIONS
+const size_t uninitialized_end_gen0_region_space = (size_t)(-1);
+#endif //USE_REGIONS
 
-int         gc_heap::gc_policy = 0;
+// budget smoothing
+size_t     gc_heap::smoothed_desired_total[total_generation_count];
+/* end of static initialization */
 
-uint64_t    gc_heap::allocation_running_time;
+void gen_to_condemn_tuning::print (int heap_num)
+{
+#ifdef DT_LOG
+    dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition));
+    dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header));
+    gc_condemn_reason_gen r_gen;
+    for (int i = 0; i < gcrg_max; i++)
+    {
+        r_gen = (gc_condemn_reason_gen)(i);
+        str_reasons_gen[i * 2] = get_gen_char (get_gen (r_gen));
+    }
+    dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_gen));
 
-size_t      gc_heap::allocation_running_amount;
+    dprintf (DT_LOG_0, ("%s", record_condemn_reasons_condition_header));
+    gc_condemn_reason_condition r_condition;
+    for (int i = 0; i < gcrc_max; i++)
+    {
+        r_condition = (gc_condemn_reason_condition)(i);
+        str_reasons_condition[i * 2] = get_condition_char (get_condition (r_condition));
+    }
 
-heap_segment* gc_heap::ephemeral_heap_segment = 0;
+    dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_condition));
+#else
+    UNREFERENCED_PARAMETER(heap_num);
+#endif //DT_LOG
+}
 
-#ifdef USE_REGIONS
-#ifdef STRESS_REGIONS
-OBJECTHANDLE* gc_heap::pinning_handles_for_alloc = 0;
-int         gc_heap::ph_index_per_heap = 0;
-int         gc_heap::pinning_seg_interval = 2;
-size_t      gc_heap::num_gen0_regions = 0;
-int         gc_heap::sip_seg_interval = 0;
-int         gc_heap::sip_seg_maxgen_interval = 0;
-size_t      gc_heap::num_condemned_regions = 0;
-#endif //STRESS_REGIONS
+void gc_generation_data::print (int heap_num, int gen_num)
+{
+#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
+    dprintf (DT_LOG_0, ("[%2d]gen%d beg %zd fl %zd fo %zd end %zd fl %zd fo %zd in %zd p %zd np %zd alloc %zd",
+                heap_num, gen_num,
+                size_before,
+                free_list_space_before, free_obj_space_before,
+                size_after,
+                free_list_space_after, free_obj_space_after,
+                in, pinned_surv, npinned_surv,
+                new_allocation));
+#else
+    UNREFERENCED_PARAMETER(heap_num);
+    UNREFERENCED_PARAMETER(gen_num);
+#endif //SIMPLE_DPRINTF && DT_LOG
+}
 
-region_free_list gc_heap::free_regions[count_free_region_kinds];
+void gc_history_per_heap::set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value)
+{
+    uint32_t* mechanism = &mechanisms[mechanism_per_heap];
+    *mechanism = 0;
+    *mechanism |= mechanism_mask;
+    *mechanism |= (1 << value);
 
-int         gc_heap::num_regions_freed_in_sweep = 0;
+#ifdef DT_LOG
+    gc_mechanism_descr* descr = &gc_mechanisms_descr[mechanism_per_heap];
+    dprintf (DT_LOG_0, ("setting %s: %s",
+            descr->name,
+            (descr->descr)[value]));
+#endif //DT_LOG
+}
 
-int         gc_heap::regions_per_gen[max_generation + 1];
+void gc_history_per_heap::print()
+{
+#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
+    for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++)
+    {
+        gen_data[i].print (heap_index, i);
+    }
 
-int         gc_heap::planned_regions_per_gen[max_generation + 1];
+    dprintf (DT_LOG_0, ("fla %zd flr %zd esa %zd ca %zd pa %zd paa %zd, rfle %d, ec %zd",
+                    maxgen_size_info.free_list_allocated,
+                    maxgen_size_info.free_list_rejected,
+                    maxgen_size_info.end_seg_allocated,
+                    maxgen_size_info.condemned_allocated,
+                    maxgen_size_info.pinned_allocated,
+                    maxgen_size_info.pinned_allocated_advance,
+                    maxgen_size_info.running_free_list_efficiency,
+                    extra_gen0_committed));
 
-int         gc_heap::sip_maxgen_regions_per_gen[max_generation + 1];
+    int mechanism = 0;
+    gc_mechanism_descr* descr = 0;
 
-heap_segment* gc_heap::reserved_free_regions_sip[max_generation];
+    for (int i = 0; i < max_mechanism_per_heap; i++)
+    {
+        mechanism = get_mechanism ((gc_mechanism_per_heap)i);
 
-int         gc_heap::new_gen0_regions_in_plns = 0;
-int         gc_heap::new_regions_in_prr = 0;
-int         gc_heap::new_regions_in_threading = 0;
-
-size_t      gc_heap::end_gen0_region_space = 0;
-
-size_t      gc_heap::end_gen0_region_committed_space = 0;
-
-size_t      gc_heap::gen0_pinned_free_space = 0;
-
-bool        gc_heap::gen0_large_chunk_found = false;
-
-size_t*     gc_heap::survived_per_region = nullptr;
-
-size_t*     gc_heap::old_card_survived_per_region = nullptr;
-#endif //USE_REGIONS
-
-BOOL        gc_heap::blocking_collection = FALSE;
-
-heap_segment* gc_heap::freeable_uoh_segment = 0;
-
-uint64_t    gc_heap::time_bgc_last = 0;
-
-size_t      gc_heap::mark_stack_tos = 0;
-
-size_t      gc_heap::mark_stack_bos = 0;
-
-size_t      gc_heap::mark_stack_array_length = 0;
-
-mark*       gc_heap::mark_stack_array = 0;
-
-#if defined (_DEBUG) && defined (VERIFY_HEAP)
-BOOL        gc_heap::verify_pinned_queue_p = FALSE;
-#endif //_DEBUG && VERIFY_HEAP
-
-uint8_t*    gc_heap::oldest_pinned_plug = 0;
-
-size_t      gc_heap::num_pinned_objects = 0;
-
-#ifdef FEATURE_LOH_COMPACTION
-size_t      gc_heap::loh_pinned_queue_tos = 0;
-
-size_t      gc_heap::loh_pinned_queue_bos = 0;
-
-size_t      gc_heap::loh_pinned_queue_length = 0;
-
-mark*       gc_heap::loh_pinned_queue = 0;
-
-BOOL        gc_heap::loh_compacted_p = FALSE;
-#endif //FEATURE_LOH_COMPACTION
-
-#ifdef BACKGROUND_GC
-
-EEThreadId  gc_heap::bgc_thread_id;
-
-uint8_t*    gc_heap::background_written_addresses [array_size+2];
-
-heap_segment* gc_heap::freeable_soh_segment = 0;
-
-size_t      gc_heap::bgc_overflow_count = 0;
-
-size_t      gc_heap::bgc_begin_uoh_size[uoh_generation_count] = {};
-size_t      gc_heap::bgc_uoh_current_size[uoh_generation_count] = {};
-size_t      gc_heap::end_uoh_size[uoh_generation_count] = {};
-
-size_t      gc_heap::uoh_a_no_bgc[uoh_generation_count] = {};
-size_t      gc_heap::uoh_a_bgc_marking[uoh_generation_count] = {};
-size_t      gc_heap::uoh_a_bgc_planning[uoh_generation_count] = {};
-#ifdef BGC_SERVO_TUNING
-size_t      gc_heap::bgc_maxgen_end_fl_size = 0;
-#endif //BGC_SERVO_TUNING
-
-size_t      gc_heap::background_soh_size_end_mark = 0;
-
-size_t      gc_heap::background_soh_alloc_count = 0;
-
-uint8_t**   gc_heap::background_mark_stack_tos = 0;
-
-uint8_t**   gc_heap::background_mark_stack_array = 0;
-
-size_t      gc_heap::background_mark_stack_array_length = 0;
-
-BOOL        gc_heap::processed_eph_overflow_p = FALSE;
-
-#ifdef USE_REGIONS
-BOOL        gc_heap::background_overflow_p = FALSE;
-#else //USE_REGIONS
-uint8_t*    gc_heap::background_min_overflow_address =0;
-
-uint8_t*    gc_heap::background_max_overflow_address =0;
-
-uint8_t*    gc_heap::background_min_soh_overflow_address =0;
-
-uint8_t*    gc_heap::background_max_soh_overflow_address =0;
-
-heap_segment* gc_heap::saved_overflow_ephemeral_seg = 0;
-
-heap_segment* gc_heap::saved_sweep_ephemeral_seg = 0;
-
-uint8_t*    gc_heap::saved_sweep_ephemeral_start = 0;
-#endif //USE_REGIONS
-
-Thread*     gc_heap::bgc_thread = 0;
-
-uint8_t**   gc_heap::c_mark_list = 0;
-
-size_t      gc_heap::c_mark_list_length = 0;
-
-size_t      gc_heap::c_mark_list_index = 0;
-
-gc_history_per_heap gc_heap::bgc_data_per_heap;
-
-BOOL    gc_heap::bgc_thread_running;
-
-CLRCriticalSection gc_heap::bgc_threads_timeout_cs;
-
-#endif //BACKGROUND_GC
-
-uint8_t**   gc_heap::mark_list;
-uint8_t**   gc_heap::mark_list_index;
-uint8_t**   gc_heap::mark_list_end;
-
-#ifdef SNOOP_STATS
-snoop_stats_data gc_heap::snoop_stat;
-#endif //SNOOP_STATS
-
-uint8_t*    gc_heap::min_overflow_address = MAX_PTR;
-
-uint8_t*    gc_heap::max_overflow_address = 0;
-
-uint8_t*    gc_heap::shigh = 0;
-
-uint8_t*    gc_heap::slow = MAX_PTR;
-
-#ifndef USE_REGIONS
-size_t      gc_heap::ordered_free_space_indices[MAX_NUM_BUCKETS];
-
-size_t      gc_heap::saved_ordered_free_space_indices[MAX_NUM_BUCKETS];
-
-size_t      gc_heap::ordered_plug_indices[MAX_NUM_BUCKETS];
-
-size_t      gc_heap::saved_ordered_plug_indices[MAX_NUM_BUCKETS];
-
-BOOL        gc_heap::ordered_plug_indices_init = FALSE;
-
-BOOL        gc_heap::use_bestfit = FALSE;
-
-uint8_t*    gc_heap::bestfit_first_pin = 0;
-
-BOOL        gc_heap::commit_end_of_seg = FALSE;
-
-size_t      gc_heap::max_free_space_items = 0;
-
-size_t      gc_heap::free_space_buckets = 0;
-
-size_t      gc_heap::free_space_items = 0;
-
-int         gc_heap::trimmed_free_space_index = 0;
-
-size_t      gc_heap::total_ephemeral_plugs = 0;
-
-seg_free_spaces* gc_heap::bestfit_seg = 0;
-
-size_t      gc_heap::total_ephemeral_size = 0;
-#endif //!USE_REGIONS
-
-#ifdef HEAP_ANALYZE
-
-size_t      gc_heap::internal_root_array_length = initial_internal_roots;
-
-uint8_t**   gc_heap::internal_root_array = 0;
-
-size_t      gc_heap::internal_root_array_index = 0;
-
-BOOL        gc_heap::heap_analyze_success = TRUE;
-
-uint8_t*    gc_heap::current_obj = 0;
-size_t      gc_heap::current_obj_size = 0;
-
-#endif //HEAP_ANALYZE
-
-#ifdef GC_CONFIG_DRIVEN
-size_t gc_heap::interesting_data_per_gc[max_idp_count];
-//size_t gc_heap::interesting_data_per_heap[max_idp_count];
-//size_t gc_heap::interesting_mechanisms_per_heap[max_im_count];
-#endif //GC_CONFIG_DRIVEN
-#endif //MULTIPLE_HEAPS
-
-no_gc_region_info gc_heap::current_no_gc_region_info;
-FinalizerWorkItem* gc_heap::finalizer_work;
-BOOL gc_heap::proceed_with_gc_p = FALSE;
-GCSpinLock gc_heap::gc_lock;
-
-#ifdef FEATURE_JAVAMARSHAL
-uint8_t**   gc_heap::global_bridge_list;
-size_t      gc_heap::num_global_bridge_objs;
-#endif //FEATURE_JAVAMARSHAL
-
-#ifdef BACKGROUND_GC
-uint64_t gc_heap::total_uoh_a_last_bgc = 0;
-#endif //BACKGROUND_GC
-
-#ifdef USE_REGIONS
-region_free_list gc_heap::global_regions_to_decommit[count_free_region_kinds];
-region_free_list gc_heap::global_free_huge_regions;
-#else //USE_REGIONS
-size_t gc_heap::eph_gen_starts_size = 0;
-heap_segment* gc_heap::segment_standby_list;
-#endif //USE_REGIONS
-bool          gc_heap::use_large_pages_p = 0;
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-size_t        gc_heap::last_gc_end_time_us = 0;
-#endif //HEAP_BALANCE_INSTRUMENTATION
-#ifdef USE_REGIONS
-bool          gc_heap::enable_special_regions_p = false;
-#else //USE_REGIONS
-size_t        gc_heap::min_segment_size = 0;
-size_t        gc_heap::min_uoh_segment_size = 0;
-#endif //!USE_REGIONS
-size_t        gc_heap::min_segment_size_shr = 0;
-size_t        gc_heap::soh_segment_size = 0;
-size_t        gc_heap::segment_info_size = 0;
-
-#ifdef GC_CONFIG_DRIVEN
-size_t gc_heap::compact_or_sweep_gcs[2];
-#endif //GC_CONFIG_DRIVEN
-
-#ifdef FEATURE_LOH_COMPACTION
-BOOL                   gc_heap::loh_compaction_always_p = FALSE;
-gc_loh_compaction_mode gc_heap::loh_compaction_mode = loh_compaction_default;
-#endif //FEATURE_LOH_COMPACTION
-
-GCEvent gc_heap::full_gc_approach_event;
-
-GCEvent gc_heap::full_gc_end_event;
-
-uint32_t gc_heap::fgn_loh_percent = 0;
-
-#ifdef BACKGROUND_GC
-BOOL gc_heap::fgn_last_gc_was_concurrent = FALSE;
-#endif //BACKGROUND_GC
-
-VOLATILE(bool) gc_heap::full_gc_approach_event_set;
-
-size_t gc_heap::full_gc_counts[gc_type_max];
-
-bool gc_heap::maxgen_size_inc_p = false;
-
-#ifndef USE_REGIONS
-BOOL gc_heap::should_expand_in_full_gc = FALSE;
-#endif //!USE_REGIONS
-
-#ifdef DYNAMIC_HEAP_COUNT
-int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default;
-gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data;
-size_t gc_heap::current_total_soh_stable_size = 0;
-uint64_t gc_heap::last_suspended_end_time = 0;
-uint64_t gc_heap::change_heap_count_time = 0;
-uint64_t gc_heap::total_change_heap_count = 0;
-uint64_t gc_heap::total_change_heap_count_time = 0;
-size_t gc_heap::gc_index_full_gc_end = 0;
-uint64_t gc_heap::before_distribute_free_regions_time = 0;
-bool gc_heap::trigger_initial_gen2_p = false;
-
-#ifdef BACKGROUND_GC
-bool gc_heap::trigger_bgc_for_rethreading_p = false;
-int gc_heap::total_bgc_threads = 0;
-int gc_heap::last_bgc_n_heaps = 0;
-int gc_heap::last_total_bgc_threads = 0;
-#endif //BACKGROUND_GC
-
-#ifdef STRESS_DYNAMIC_HEAP_COUNT
-int gc_heap::heaps_in_this_gc = 0;
-int gc_heap::bgc_to_ngc2_ratio = 0;
-#endif //STRESS_DYNAMIC_HEAP_COUNT
-#endif // DYNAMIC_HEAP_COUNT
-
-// Provisional mode related stuff.
-bool gc_heap::provisional_mode_triggered = false;
-bool gc_heap::pm_trigger_full_gc = false;
-size_t gc_heap::provisional_triggered_gc_count = 0;
-size_t gc_heap::provisional_off_gc_count = 0;
-size_t gc_heap::num_provisional_triggered = 0;
-bool   gc_heap::pm_stress_on = false;
-
-#ifdef HEAP_ANALYZE
-BOOL        gc_heap::heap_analyze_enabled = FALSE;
-#endif //HEAP_ANALYZE
-
-#ifndef MULTIPLE_HEAPS
-
-alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST - 1];
-alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST - 1];
-alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST - 1];
-
-#ifdef DOUBLY_LINKED_FL
-// size we removed with no undo; only for recording purpose
-size_t gc_heap::gen2_removed_no_undo = 0;
-size_t gc_heap::saved_pinned_plug_index = INVALID_SAVED_PINNED_PLUG_INDEX;
-#endif //DOUBLY_LINKED_FL
-
-#ifdef FEATURE_EVENT_TRACE
-etw_bucket_info gc_heap::bucket_info[NUM_GEN2_ALIST];
-#endif //FEATURE_EVENT_TRACE
-
-dynamic_data gc_heap::dynamic_data_table [total_generation_count];
-gc_history_per_heap gc_heap::gc_data_per_heap;
-size_t gc_heap::total_promoted_bytes = 0;
-size_t gc_heap::finalization_promoted_bytes = 0;
-size_t gc_heap::maxgen_pinned_compact_before_advance = 0;
-
-uint8_t* gc_heap::alloc_allocated = 0;
-
-size_t gc_heap::allocation_quantum = CLR_SIZE;
-
-GCSpinLock gc_heap::more_space_lock_soh;
-GCSpinLock gc_heap::more_space_lock_uoh;
-
-#ifdef BACKGROUND_GC
-VOLATILE(int32_t) gc_heap::uoh_alloc_thread_count = 0;
-#endif //BACKGROUND_GC
-
-#ifdef SYNCHRONIZATION_STATS
-unsigned int gc_heap::good_suspension = 0;
-unsigned int gc_heap::bad_suspension = 0;
-uint64_t     gc_heap::total_msl_acquire = 0;
-unsigned int gc_heap::num_msl_acquired = 0;
-unsigned int gc_heap::num_high_msl_acquire = 0;
-unsigned int gc_heap::num_low_msl_acquire = 0;
-#endif //SYNCHRONIZATION_STATS
-
-size_t   gc_heap::alloc_contexts_used = 0;
-size_t   gc_heap::soh_allocation_no_gc = 0;
-size_t   gc_heap::loh_allocation_no_gc = 0;
-bool     gc_heap::no_gc_oom_p = false;
-heap_segment* gc_heap::saved_loh_segment_no_gc = 0;
-
-#endif //MULTIPLE_HEAPS
-
-#ifndef MULTIPLE_HEAPS
-
-BOOL        gc_heap::gen0_bricks_cleared = FALSE;
-
-int         gc_heap::gen0_must_clear_bricks = 0;
-
-#ifdef FEATURE_PREMORTEM_FINALIZATION
-CFinalize*  gc_heap::finalize_queue = 0;
-#endif // FEATURE_PREMORTEM_FINALIZATION
-
-#ifdef FEATURE_CARD_MARKING_STEALING
-VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh;
-VOLATILE(bool) gc_heap::card_mark_done_soh;
-VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh;
-VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh;
-VOLATILE(bool) gc_heap::card_mark_done_uoh;
-#endif // FEATURE_CARD_MARKING_STEALING
-
-generation gc_heap::generation_table [total_generation_count];
-
-size_t     gc_heap::interesting_data_per_heap[max_idp_count];
-
-size_t     gc_heap::compact_reasons_per_heap[max_compact_reasons_count];
-
-size_t     gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count];
-
-size_t     gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count];
-
-mark_queue_t gc_heap::mark_queue;
-
-#ifdef USE_REGIONS
-bool gc_heap::special_sweep_p = false;
-#endif //USE_REGIONS
-
-int gc_heap::loh_pinned_queue_decay = LOH_PIN_DECAY;
-
-#endif // MULTIPLE_HEAPS
-
-/* end of per heap static initialization */
-
-#ifdef USE_REGIONS
-const size_t uninitialized_end_gen0_region_space = (size_t)(-1);
-#endif //USE_REGIONS
-
-// budget smoothing
-size_t     gc_heap::smoothed_desired_total[total_generation_count];
-/* end of static initialization */
-
-// This is for methods that need to iterate through all SOH heap segments/regions.
-inline
-int get_start_generation_index()
-{
-#ifdef USE_REGIONS
-    return 0;
-#else
-    return max_generation;
-#endif //USE_REGIONS
-}
-
-inline
-int get_stop_generation_index (int condemned_gen_number)
-{
-#ifdef USE_REGIONS
-    return 0;
-#else
-    return condemned_gen_number;
-#endif //USE_REGIONS
-}
-
-void gen_to_condemn_tuning::print (int heap_num)
-{
-#ifdef DT_LOG
-    dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition));
-    dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header));
-    gc_condemn_reason_gen r_gen;
-    for (int i = 0; i < gcrg_max; i++)
-    {
-        r_gen = (gc_condemn_reason_gen)(i);
-        str_reasons_gen[i * 2] = get_gen_char (get_gen (r_gen));
-    }
-    dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_gen));
-
-    dprintf (DT_LOG_0, ("%s", record_condemn_reasons_condition_header));
-    gc_condemn_reason_condition r_condition;
-    for (int i = 0; i < gcrc_max; i++)
-    {
-        r_condition = (gc_condemn_reason_condition)(i);
-        str_reasons_condition[i * 2] = get_condition_char (get_condition (r_condition));
-    }
-
-    dprintf (DT_LOG_0, ("[%2d]%s", heap_num, str_reasons_condition));
-#else
-    UNREFERENCED_PARAMETER(heap_num);
-#endif //DT_LOG
-}
-
-void gc_generation_data::print (int heap_num, int gen_num)
-{
-#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
-    dprintf (DT_LOG_0, ("[%2d]gen%d beg %zd fl %zd fo %zd end %zd fl %zd fo %zd in %zd p %zd np %zd alloc %zd",
-                heap_num, gen_num,
-                size_before,
-                free_list_space_before, free_obj_space_before,
-                size_after,
-                free_list_space_after, free_obj_space_after,
-                in, pinned_surv, npinned_surv,
-                new_allocation));
-#else
-    UNREFERENCED_PARAMETER(heap_num);
-    UNREFERENCED_PARAMETER(gen_num);
-#endif //SIMPLE_DPRINTF && DT_LOG
-}
-
-void gc_history_per_heap::set_mechanism (gc_mechanism_per_heap mechanism_per_heap, uint32_t value)
-{
-    uint32_t* mechanism = &mechanisms[mechanism_per_heap];
-    *mechanism = 0;
-    *mechanism |= mechanism_mask;
-    *mechanism |= (1 << value);
-
-#ifdef DT_LOG
-    gc_mechanism_descr* descr = &gc_mechanisms_descr[mechanism_per_heap];
-    dprintf (DT_LOG_0, ("setting %s: %s",
-            descr->name,
-            (descr->descr)[value]));
-#endif //DT_LOG
-}
-
-void gc_history_per_heap::print()
-{
-#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
-    for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++)
-    {
-        gen_data[i].print (heap_index, i);
-    }
-
-    dprintf (DT_LOG_0, ("fla %zd flr %zd esa %zd ca %zd pa %zd paa %zd, rfle %d, ec %zd",
-                    maxgen_size_info.free_list_allocated,
-                    maxgen_size_info.free_list_rejected,
-                    maxgen_size_info.end_seg_allocated,
-                    maxgen_size_info.condemned_allocated,
-                    maxgen_size_info.pinned_allocated,
-                    maxgen_size_info.pinned_allocated_advance,
-                    maxgen_size_info.running_free_list_efficiency,
-                    extra_gen0_committed));
-
-    int mechanism = 0;
-    gc_mechanism_descr* descr = 0;
-
-    for (int i = 0; i < max_mechanism_per_heap; i++)
-    {
-        mechanism = get_mechanism ((gc_mechanism_per_heap)i);
-
-        if (mechanism >= 0)
-        {
-            descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i];
-            dprintf (DT_LOG_0, ("[%2d]%s%s",
-                        heap_index,
-                        descr->name,
-                        (descr->descr)[mechanism]));
-        }
-    }
-#endif //SIMPLE_DPRINTF && DT_LOG
-}
-
-void gc_history_global::print()
-{
-#ifdef DT_LOG
-    char str_settings[64];
-    memset (str_settings, '|', sizeof (char) * 64);
-    str_settings[max_global_mechanisms_count*2] = 0;
-
-    for (int i = 0; i < max_global_mechanisms_count; i++)
-    {
-        str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N');
-    }
-
-    dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|"));
-
-    dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings));
-    dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %zd(%d), memload %d",
-                        condemned_generation,
-                        str_gc_reasons[reason],
-                        str_gc_pause_modes[pause_mode],
-                        final_youngest_desired,
-                        gen0_reduction_count,
-                        mem_pressure));
-#endif //DT_LOG
-}
-
-uint32_t limit_time_to_uint32 (uint64_t time)
-{
-    time = min (time, (uint64_t)UINT32_MAX);
-    return (uint32_t)time;
-}
-
-inline BOOL
-in_range_for_segment(uint8_t* add, heap_segment* seg)
-{
-    return ((add >= heap_segment_mem (seg)) && (add < heap_segment_reserved (seg)));
-}
-
-#ifdef FEATURE_BASICFREEZE
-// The array we allocate is organized as follows:
-// 0th element is the address of the last array we allocated.
-// starting from the 1st element are the segment addresses, that's
-// what buckets() returns.
-struct bk
-{
-    uint8_t* add;
-    size_t val;
-};
-
-class sorted_table
-{
-private:
-    ptrdiff_t size;
-    ptrdiff_t count;
-    bk* slots;
-    bk* buckets() { return (slots + 1); }
-    uint8_t*& last_slot (bk* arr) { return arr[0].add; }
-    bk* old_slots;
-public:
-    static  sorted_table* make_sorted_table ();
-    BOOL    insert (uint8_t* add, size_t val);;
-    size_t  lookup (uint8_t*& add);
-    void    remove (uint8_t* add);
-    void    clear ();
-    void    delete_sorted_table();
-    void    delete_old_slots();
-    void    enqueue_old_slot(bk* sl);
-    BOOL    ensure_space_for_insert();
-};
-
-sorted_table*
-sorted_table::make_sorted_table ()
-{
-    size_t size = 400;
-
-    // allocate one more bk to store the older slot address.
-    sorted_table* res = (sorted_table*)new (nothrow) char [sizeof (sorted_table) + (size + 1) * sizeof (bk)];
-    if (!res)
-        return 0;
-    res->size = size;
-    res->slots = (bk*)(res + 1);
-    res->old_slots = 0;
-    res->clear();
-    return res;
-}
-
-void
-sorted_table::delete_sorted_table()
-{
-    if (slots != (bk*)(this+1))
-    {
-        delete[] slots;
-    }
-    delete_old_slots();
-}
-void
-sorted_table::delete_old_slots()
-{
-    uint8_t* sl = (uint8_t*)old_slots;
-    while (sl)
-    {
-        uint8_t* dsl = sl;
-        sl = last_slot ((bk*)sl);
-        delete[] dsl;
-    }
-    old_slots = 0;
-}
-void
-sorted_table::enqueue_old_slot(bk* sl)
-{
-    last_slot (sl) = (uint8_t*)old_slots;
-    old_slots = sl;
-}
-
-inline
-size_t
-sorted_table::lookup (uint8_t*& add)
-{
-    ptrdiff_t high = (count-1);
-    ptrdiff_t low = 0;
-    ptrdiff_t ti;
-    ptrdiff_t mid;
-    bk* buck = buckets();
-    while (low <= high)
-    {
-        mid = ((low + high)/2);
-        ti = mid;
-        if (buck[ti].add > add)
-        {
-            if ((ti > 0) && (buck[ti-1].add <= add))
-            {
-                add = buck[ti-1].add;
-                return buck[ti - 1].val;
-            }
-            high = mid - 1;
-        }
-        else
-        {
-            if (buck[ti+1].add > add)
-            {
-                add = buck[ti].add;
-                return buck[ti].val;
-            }
-            low = mid + 1;
-        }
-    }
-    add = 0;
-    return 0;
-}
-
-BOOL
-sorted_table::ensure_space_for_insert()
-{
-    if (count == size)
-    {
-        size = (size * 3)/2;
-        assert((size * sizeof (bk)) > 0);
-        bk* res = (bk*)new (nothrow) char [(size + 1) * sizeof (bk)];
-        assert (res);
-        if (!res)
-            return FALSE;
-
-        last_slot (res) = 0;
-        memcpy (((bk*)res + 1), buckets(), count * sizeof (bk));
-        bk* last_old_slots = slots;
-        slots = res;
-        if (last_old_slots != (bk*)(this + 1))
-            enqueue_old_slot (last_old_slots);
-    }
-    return TRUE;
-}
-
-BOOL
-sorted_table::insert (uint8_t* add, size_t val)
-{
-    //grow if no more room
-    assert (count < size);
-
-    //insert sorted
-    ptrdiff_t high = (count-1);
-    ptrdiff_t low = 0;
-    ptrdiff_t ti;
-    ptrdiff_t mid;
-    bk* buck = buckets();
-    while (low <= high)
-    {
-        mid = ((low + high)/2);
-        ti = mid;
-        if (buck[ti].add > add)
-        {
-            if ((ti == 0) || (buck[ti-1].add <= add))
-            {
-                // found insertion point
-                for (ptrdiff_t k = count; k > ti;k--)
-                {
-                    buck [k] = buck [k-1];
-                }
-                buck[ti].add = add;
-                buck[ti].val = val;
-                count++;
-                return TRUE;
-            }
-            high = mid - 1;
-        }
-        else
-        {
-            if (buck[ti+1].add > add)
-            {
-                //found the insertion point
-                for (ptrdiff_t k = count; k > ti+1;k--)
-                {
-                    buck [k] = buck [k-1];
-                }
-                buck[ti+1].add = add;
-                buck[ti+1].val = val;
-                count++;
-                return TRUE;
-            }
-            low = mid + 1;
-        }
-    }
-    assert (0);
-    return TRUE;
-}
-
-void
-sorted_table::remove (uint8_t* add)
-{
-    ptrdiff_t high = (count-1);
-    ptrdiff_t low = 0;
-    ptrdiff_t ti;
-    ptrdiff_t mid;
-    bk* buck = buckets();
-    while (low <= high)
-    {
-        mid = ((low + high)/2);
-        ti = mid;
-        if (buck[ti].add > add)
-        {
-            if (buck[ti-1].add <= add)
-            {
-                for (ptrdiff_t k = ti; k < count; k++)
-                    buck[k-1] = buck[k];
-                count--;
-                return;
-            }
-            high = mid - 1;
-        }
-        else
-        {
-            if (buck[ti+1].add > add)
-            {
-                for (ptrdiff_t k = ti+1; k < count; k++)
-                    buck[k-1] = buck[k];
-                count--;
-                return;
-            }
-            low = mid + 1;
-        }
-    }
-    assert (0);
-}
-
-void
-sorted_table::clear()
-{
-    count = 1;
-    buckets()[0].add = MAX_PTR;
-}
-#endif //FEATURE_BASICFREEZE
-
-#ifdef USE_REGIONS
-inline
-size_t get_skewed_basic_region_index_for_address (uint8_t* address)
-{
-    assert ((g_gc_lowest_address <= address) && (address <= g_gc_highest_address));
-    size_t skewed_basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr;
-    return skewed_basic_region_index;
-}
-
-inline
-size_t get_basic_region_index_for_address (uint8_t* address)
-{
-    size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (address);
-    return (skewed_basic_region_index - get_skewed_basic_region_index_for_address (g_gc_lowest_address));
-}
-
-// Go from a random address to its region info. The random address could be
-// in one of the basic regions of a larger region so we need to check for that.
-inline
-heap_segment* get_region_info_for_address (uint8_t* address)
-{
-    size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr;
-    heap_segment* basic_region_info_entry = (heap_segment*)&seg_mapping_table[basic_region_index];
-    ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated (basic_region_info_entry);
-    if (first_field < 0)
-    {
-        basic_region_index += first_field;
-    }
-
-    return ((heap_segment*)(&seg_mapping_table[basic_region_index]));
-}
-
-// Go from the physical start of a region to its region info.
-inline
-heap_segment* get_region_info (uint8_t* region_start)
-{
-    size_t region_index = (size_t)region_start >> gc_heap::min_segment_size_shr;
-    heap_segment* region_info_entry = (heap_segment*)&seg_mapping_table[region_index];
-    dprintf (REGIONS_LOG, ("region info for region %p is at %zd, %zx (alloc: %p)",
-        region_start, region_index, (size_t)region_info_entry, heap_segment_allocated (region_info_entry)));
-    return (heap_segment*)&seg_mapping_table[region_index];
-}
-
-// Go from the actual region info to its region start.
-inline
-uint8_t* get_region_start (heap_segment* region_info)
-{
-    uint8_t* obj_start = heap_segment_mem (region_info);
-    return (obj_start - sizeof (aligned_plug_and_gap));
-}
-
-inline
-size_t get_region_size (heap_segment* region_info)
-{
-    return (size_t)(heap_segment_reserved (region_info) - get_region_start (region_info));
-}
-
-inline
-size_t get_region_committed_size (heap_segment* region)
-{
-    uint8_t* start = get_region_start (region);
-    uint8_t* committed = heap_segment_committed (region);
-    return committed - start;
-}
-
-inline bool is_free_region (heap_segment* region)
-{
-    return (heap_segment_allocated (region) == nullptr);
-}
-
-
-#endif //USE_REGIONS
-
-inline
-uint8_t* align_on_segment (uint8_t* add)
-{
-    return (uint8_t*)((size_t)(add + (((size_t)1 << gc_heap::min_segment_size_shr) - 1)) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1));
-}
-
-inline
-uint8_t* align_lower_segment (uint8_t* add)
-{
-    return (uint8_t*)((size_t)(add) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1));
-}
-
-
-#ifdef FEATURE_BASICFREEZE
-inline
-size_t ro_seg_begin_index (heap_segment* seg)
-{
-#ifdef USE_REGIONS
-    size_t begin_index = (size_t)heap_segment_mem (seg) >> gc_heap::min_segment_size_shr;
-#else
-    size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr;
-#endif //USE_REGIONS
-    begin_index = max (begin_index, (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr);
-    return begin_index;
-}
-
-inline
-size_t ro_seg_end_index (heap_segment* seg)
-{
-    size_t end_index = (size_t)(heap_segment_reserved (seg) - 1) >> gc_heap::min_segment_size_shr;
-    end_index = min (end_index, (size_t)g_gc_highest_address >> gc_heap::min_segment_size_shr);
-    return end_index;
-}
-
-
-heap_segment* ro_segment_lookup (uint8_t* o)
-{
-    uint8_t* ro_seg_start = o;
-    heap_segment* seg = (heap_segment*)gc_heap::seg_table->lookup (ro_seg_start);
-
-    if (ro_seg_start && in_range_for_segment (o, seg))
-        return seg;
-    else
-        return 0;
-}
-
-#endif //FEATURE_BASICFREEZE
-
-#ifdef MULTIPLE_HEAPS
-inline
-gc_heap* seg_mapping_table_heap_of_worker (uint8_t* o)
-{
-    size_t index = (size_t)o >> gc_heap::min_segment_size_shr;
-    seg_mapping* entry = &seg_mapping_table[index];
-
-#ifdef USE_REGIONS
-    gc_heap* hp = heap_segment_heap ((heap_segment*)entry);
-#else
-    gc_heap* hp = ((o > entry->boundary) ? entry->h1 : entry->h0);
-
-    dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, h0: %p, seg0: %p, h1: %p, seg1: %p",
-        o, index, (entry->boundary + 1),
-        (uint8_t*)(entry->h0), (uint8_t*)(entry->seg0),
-        (uint8_t*)(entry->h1), (uint8_t*)(entry->seg1)));
-
-#ifdef _DEBUG
-    heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0);
-#ifdef FEATURE_BASICFREEZE
-    if ((size_t)seg & ro_in_entry)
-        seg = (heap_segment*)((size_t)seg & ~ro_in_entry);
-#endif //FEATURE_BASICFREEZE
-
-#ifdef TRACE_GC
-    if (seg)
-    {
-        if (in_range_for_segment (o, seg))
-        {
-            dprintf (2, ("obj %p belongs to segment %p(-%p)", o, seg, (uint8_t*)heap_segment_allocated (seg)));
-        }
-        else
-        {
-            dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg",
-                seg, (uint8_t*)heap_segment_allocated (seg), o));
-        }
-    }
-    else
-    {
-        dprintf (2, ("could not find obj %p in any existing segments", o));
-    }
-#endif //TRACE_GC
-#endif //_DEBUG
-#endif //USE_REGIONS
-    return hp;
-}
-
-
-#endif //MULTIPLE_HEAPS
-
-// Only returns a valid seg if we can actually find o on the seg.
-heap_segment* seg_mapping_table_segment_of (uint8_t* o)
-{
-#ifdef FEATURE_BASICFREEZE
-    if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address))
-        return ro_segment_lookup (o);
-#endif //FEATURE_BASICFREEZE
-
-    size_t index = (size_t)o >> gc_heap::min_segment_size_shr;
-    seg_mapping* entry = &seg_mapping_table[index];
-
-#ifdef USE_REGIONS
-    // REGIONS TODO: I think we could simplify this to having the same info for each
-    // basic entry in a large region so we can get it right away instead of having to go
-    // back some entries.
-    ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated ((heap_segment*)entry);
-    if (first_field == 0)
-    {
-        dprintf (REGIONS_LOG, ("asked for seg for %p, in a freed region mem: %p, committed %p",
-            o, heap_segment_mem ((heap_segment*)entry),
-            heap_segment_committed ((heap_segment*)entry)));
-        return 0;
-    }
-    // Regions are never going to intersect an ro seg, so this can never be ro_in_entry.
-    assert (first_field != 0);
-    assert (first_field != ro_in_entry);
-    if (first_field < 0)
-    {
-        index += first_field;
-    }
-    heap_segment* seg = (heap_segment*)&seg_mapping_table[index];
-#else //USE_REGIONS
-    dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, seg0: %p, seg1: %p",
-        o, index, (entry->boundary + 1),
-        (uint8_t*)(entry->seg0), (uint8_t*)(entry->seg1)));
-
-    heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0);
-#ifdef FEATURE_BASICFREEZE
-    if ((size_t)seg & ro_in_entry)
-        seg = (heap_segment*)((size_t)seg & ~ro_in_entry);
-#endif //FEATURE_BASICFREEZE
-#endif //USE_REGIONS
-
-    if (seg)
-    {
-        if (in_range_for_segment (o, seg))
-        {
-            dprintf (2, ("obj %p belongs to segment %p(-%p)", o, (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg)));
-        }
-        else
-        {
-            dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg, setting it to 0",
-                (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg), o));
-            seg = 0;
-        }
-    }
-    else
-    {
-        dprintf (2, ("could not find obj %p in any existing segments", o));
-    }
-
-#ifdef FEATURE_BASICFREEZE
-    // TODO: This was originally written assuming that the seg_mapping_table would always contain entries for ro
-    // segments whenever the ro segment falls into the [g_gc_lowest_address,g_gc_highest_address) range.  I.e., it had an
-    // extra "&& (size_t)(entry->seg1) & ro_in_entry" expression.  However, at the moment, grow_brick_card_table does
-    // not correctly go through the ro segments and add them back to the seg_mapping_table when the [lowest,highest)
-    // range changes.  We should probably go ahead and modify grow_brick_card_table and put back the
-    // "&& (size_t)(entry->seg1) & ro_in_entry" here.
-    if (!seg)
-    {
-        seg = ro_segment_lookup (o);
-        if (seg && !in_range_for_segment (o, seg))
-            seg = 0;
-    }
-#endif //FEATURE_BASICFREEZE
-
-    return seg;
-}
-
-size_t gcard_of ( uint8_t*);
-
-#define GC_MARKED       (size_t)0x1
-#ifdef DOUBLY_LINKED_FL
-// This bit indicates that we'll need to set the bgc mark bit for this object during an FGC.
-// We only do this when we decide to compact.
-#define BGC_MARKED_BY_FGC (size_t)0x2
-#define MAKE_FREE_OBJ_IN_COMPACT (size_t)0x4
-#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED|BGC_MARKED_BY_FGC|MAKE_FREE_OBJ_IN_COMPACT)
-#else //DOUBLY_LINKED_FL
-#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED)
-#endif //!DOUBLY_LINKED_FL
-
-#ifdef HOST_64BIT
-#define SPECIAL_HEADER_BITS (0x7)
-#else
-#define SPECIAL_HEADER_BITS (0x3)
-#endif
-
-#define slot(i, j) ((uint8_t**)(i))[(j)+1]
-
-#define free_object_base_size (plug_skew + sizeof(ArrayBase))
-
-#define free_list_slot(x) ((uint8_t**)(x))[2]
-#define free_list_undo(x) ((uint8_t**)(x))[-1]
-#define UNDO_EMPTY ((uint8_t*)1)
-
-#ifdef DOUBLY_LINKED_FL
-#define free_list_prev(x) ((uint8_t**)(x))[3]
-#define PREV_EMPTY ((uint8_t*)1)
-
-void check_and_clear_in_free_list (uint8_t* o, size_t size)
-{
-    if (size >= min_free_list)
-    {
-        free_list_prev (o) = PREV_EMPTY;
-    }
-}
-
-#endif //DOUBLY_LINKED_FL
-
-class CObjectHeader : public Object
-{
-public:
-
-#if defined(FEATURE_NATIVEAOT) || defined(BUILD_AS_STANDALONE)
-    // The GC expects the following methods that are provided by the Object class in the CLR but not provided
-    // by NativeAOT's version of Object.
-    uint32_t GetNumComponents()
-    {
-        return ((ArrayBase *)this)->GetNumComponents();
-    }
-
-    void Validate(BOOL bDeep=TRUE, BOOL bVerifyNextHeader = FALSE, BOOL bVerifySyncBlock = FALSE)
-    {
-        // declaration of extra parameters just so the call site would need no #ifdefs
-        UNREFERENCED_PARAMETER(bVerifyNextHeader);
-        UNREFERENCED_PARAMETER(bVerifySyncBlock);
-
-        MethodTable * pMT = GetMethodTable();
-
-        _ASSERTE(pMT->SanityCheck());
-
-        bool noRangeChecks =
-            (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_NO_RANGE_CHECKS) == GCConfig::HEAPVERIFY_NO_RANGE_CHECKS;
-
-        BOOL fSmallObjectHeapPtr = FALSE, fLargeObjectHeapPtr = FALSE;
-        if (!noRangeChecks)
-        {
-            fSmallObjectHeapPtr = g_theGCHeap->IsHeapPointer(this, TRUE);
-            if (!fSmallObjectHeapPtr)
-                fLargeObjectHeapPtr = g_theGCHeap->IsHeapPointer(this);
-
-            _ASSERTE(fSmallObjectHeapPtr || fLargeObjectHeapPtr);
-        }
-
-#ifdef FEATURE_STRUCTALIGN
-        _ASSERTE(IsStructAligned((uint8_t *)this, GetMethodTable()->GetBaseAlignment()));
-#endif // FEATURE_STRUCTALIGN
-
-#if defined(FEATURE_64BIT_ALIGNMENT) && !defined(FEATURE_NATIVEAOT)
-        if (pMT->RequiresAlign8())
-        {
-            _ASSERTE((((size_t)this) & 0x7) == (pMT->IsValueType() ? 4U : 0U));
-        }
-#endif // FEATURE_64BIT_ALIGNMENT
-
-#ifdef VERIFY_HEAP
-        if (bDeep && (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC))
-            g_theGCHeap->ValidateObjectMember(this);
-#endif
-        if (fSmallObjectHeapPtr)
-        {
-#ifdef FEATURE_BASICFREEZE
-            _ASSERTE(!g_theGCHeap->IsLargeObject(this) || g_theGCHeap->IsInFrozenSegment(this));
-#else
-            _ASSERTE(!g_theGCHeap->IsLargeObject(this));
-#endif
-        }
-    }
-
-    void ValidateHeap(BOOL bDeep)
-    {
-        Validate(bDeep);
-    }
-
-#endif //FEATURE_NATIVEAOT || BUILD_AS_STANDALONE
-
-    /////
-    //
-    // Header Status Information
-    //
-
-    MethodTable    *GetMethodTable() const
-    {
-        return( (MethodTable *) (((size_t) RawGetMethodTable()) & (~SPECIAL_HEADER_BITS)));
-    }
-
-    void SetMarked()
-    {
-        _ASSERTE(RawGetMethodTable());
-        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | GC_MARKED));
-    }
-
-    BOOL IsMarked() const
-    {
-        return !!(((size_t)RawGetMethodTable()) & GC_MARKED);
-    }
-
-    void SetPinned()
-    {
-        assert (!(gc_heap::settings.concurrent));
-        GetHeader()->SetGCBit();
-    }
-
-    BOOL IsPinned() const
-    {
-        return !!((((CObjectHeader*)this)->GetHeader()->GetBits()) & BIT_SBLK_GC_RESERVE);
-    }
-
-    // Now we set more bits should actually only clear the mark bit
-    void ClearMarked()
-    {
-#ifdef DOUBLY_LINKED_FL
-        RawSetMethodTable ((MethodTable *)(((size_t) RawGetMethodTable()) & (~GC_MARKED)));
-#else
-        RawSetMethodTable (GetMethodTable());
-#endif //DOUBLY_LINKED_FL
-    }
-
-#ifdef DOUBLY_LINKED_FL
-    void SetBGCMarkBit()
-    {
-        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | BGC_MARKED_BY_FGC));
-    }
-    BOOL IsBGCMarkBitSet() const
-    {
-        return !!(((size_t)RawGetMethodTable()) & BGC_MARKED_BY_FGC);
-    }
-    void ClearBGCMarkBit()
-    {
-        RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~BGC_MARKED_BY_FGC)));
-    }
-
-    void SetFreeObjInCompactBit()
-    {
-        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | MAKE_FREE_OBJ_IN_COMPACT));
-    }
-    BOOL IsFreeObjInCompactBitSet() const
-    {
-        return !!(((size_t)RawGetMethodTable()) & MAKE_FREE_OBJ_IN_COMPACT);
-    }
-    void ClearFreeObjInCompactBit()
-    {
-#ifdef _DEBUG
-        // check this looks like an object, but do NOT validate pointers to other objects
-        // as these may not be valid yet - we are calling this during compact_phase
-        Validate(FALSE);
-#endif //_DEBUG
-        RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~MAKE_FREE_OBJ_IN_COMPACT)));
-    }
-#endif //DOUBLY_LINKED_FL
-
-    size_t ClearSpecialBits()
-    {
-        size_t special_bits = ((size_t)RawGetMethodTable()) & SPECIAL_HEADER_BITS;
-        if (special_bits != 0)
-        {
-            assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0);
-            RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) & ~(SPECIAL_HEADER_BITS)));
-        }
-        return special_bits;
-    }
-
-    void SetSpecialBits (size_t special_bits)
-    {
-        assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0);
-        if (special_bits != 0)
-        {
-            RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) | special_bits));
-        }
-    }
-
-    CGCDesc *GetSlotMap ()
-    {
-        assert (GetMethodTable()->ContainsGCPointers());
-        return CGCDesc::GetCGCDescFromMT(GetMethodTable());
-    }
-
-    void SetFree(size_t size)
-    {
-        assert (size >= free_object_base_size);
-
-        assert (g_gc_pFreeObjectMethodTable->GetBaseSize() == free_object_base_size);
-        assert (g_gc_pFreeObjectMethodTable->RawGetComponentSize() == 1);
-
-        RawSetMethodTable( g_gc_pFreeObjectMethodTable );
-
-        size_t* numComponentsPtr = (size_t*) &((uint8_t*) this)[ArrayBase::GetOffsetOfNumComponents()];
-        *numComponentsPtr = size - free_object_base_size;
-#ifdef VERIFY_HEAP
-        //This introduces a bug in the free list management.
-        //((void**) this)[-1] = 0;    // clear the sync block,
-        assert (*numComponentsPtr >= 0);
-        if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
-        {
-            memset (((uint8_t*)this)+sizeof(ArrayBase), 0xcc, *numComponentsPtr);
-#ifdef DOUBLY_LINKED_FL
-            // However, in this case we can't leave the Next field uncleared because no one will clear it
-            // so it remains 0xcc and that's not good for verification
-            if (*numComponentsPtr > 0)
-            {
-                free_list_slot (this) = 0;
-            }
-#endif //DOUBLY_LINKED_FL
-        }
-#endif //VERIFY_HEAP
-
-#ifdef DOUBLY_LINKED_FL
-        // For background GC, we need to distinguish between a free object that's not on the free list
-        // and one that is. So we always set its prev to PREV_EMPTY to indicate that it's a free
-        // object that's not on the free list. If it should be on the free list, it will be set to the
-        // appropriate non zero value.
-        check_and_clear_in_free_list ((uint8_t*)this, size);
-#endif //DOUBLY_LINKED_FL
-    }
-
-    void UnsetFree()
-    {
-        size_t size = free_object_base_size - plug_skew;
-
-        // since we only need to clear 2 ptr size, we do it manually
-        PTR_PTR m = (PTR_PTR) this;
-        for (size_t i = 0; i < size / sizeof(PTR_PTR); i++)
-            *(m++) = 0;
-    }
-
-    BOOL IsFree () const
-    {
-        return (GetMethodTable() == g_gc_pFreeObjectMethodTable);
-    }
-
-#ifdef FEATURE_STRUCTALIGN
-    int GetRequiredAlignment () const
-    {
-        return GetMethodTable()->GetRequiredAlignment();
-    }
-#endif // FEATURE_STRUCTALIGN
-
-    BOOL ContainsGCPointers() const
-    {
-        return GetMethodTable()->ContainsGCPointers();
-    }
-
-#ifdef COLLECTIBLE_CLASS
-    BOOL Collectible() const
-    {
-        return GetMethodTable()->Collectible();
-    }
-
-    FORCEINLINE BOOL ContainsGCPointersOrCollectible() const
-    {
-        MethodTable *pMethodTable = GetMethodTable();
-        return (pMethodTable->ContainsGCPointers() || pMethodTable->Collectible());
-    }
-#endif //COLLECTIBLE_CLASS
-
-    Object* GetObjectBase() const
-    {
-        return (Object*) this;
-    }
-};
-
-#define header(i) ((CObjectHeader*)(i))
-#define method_table(o) ((CObjectHeader*)(o))->GetMethodTable()
-
-#ifdef DOUBLY_LINKED_FL
-inline
-BOOL is_on_free_list (uint8_t* o, size_t size)
-{
-    if (size >= min_free_list)
-    {
-        if (header(o)->GetMethodTable() == g_gc_pFreeObjectMethodTable)
-        {
-            return (free_list_prev (o) != PREV_EMPTY);
-        }
-    }
-
-    return FALSE;
-}
-
-inline
-void set_plug_bgc_mark_bit (uint8_t* node)
-{
-    header(node)->SetBGCMarkBit();
-}
-
-inline
-BOOL is_plug_bgc_mark_bit_set (uint8_t* node)
-{
-    return header(node)->IsBGCMarkBitSet();
-}
-
-inline
-void clear_plug_bgc_mark_bit (uint8_t* node)
-{
-    header(node)->ClearBGCMarkBit();
-}
-
-inline
-void set_free_obj_in_compact_bit (uint8_t* node)
-{
-    header(node)->SetFreeObjInCompactBit();
-}
-
-inline
-BOOL is_free_obj_in_compact_bit_set (uint8_t* node)
-{
-    return header(node)->IsFreeObjInCompactBitSet();
-}
-
-inline
-void clear_free_obj_in_compact_bit (uint8_t* node)
-{
-    header(node)->ClearFreeObjInCompactBit();
-}
-#endif //DOUBLY_LINKED_FL
-
-#ifdef SHORT_PLUGS
-inline
-void set_plug_padded (uint8_t* node)
-{
-    header(node)->SetMarked();
-}
-inline
-void clear_plug_padded (uint8_t* node)
-{
-    header(node)->ClearMarked();
-}
-inline
-BOOL is_plug_padded (uint8_t* node)
-{
-    return header(node)->IsMarked();
-}
-#else //SHORT_PLUGS
-inline void set_plug_padded (uint8_t* node){}
-inline void clear_plug_padded (uint8_t* node){}
-inline
-BOOL is_plug_padded (uint8_t* node){return FALSE;}
-#endif //SHORT_PLUGS
-
-
-inline size_t unused_array_size(uint8_t * p)
-{
-    assert(((CObjectHeader*)p)->IsFree());
-
-    size_t* numComponentsPtr = (size_t*)(p + ArrayBase::GetOffsetOfNumComponents());
-    return free_object_base_size + *numComponentsPtr;
-}
-
-inline
-heap_segment* heap_segment_non_sip (heap_segment* ns)
-{
-#ifdef USE_REGIONS
-    if ((ns == 0) || !heap_segment_swept_in_plan (ns))
-    {
-        return ns;
-    }
-    else
-    {
-        do
-        {
-            if (heap_segment_swept_in_plan (ns))
-            {
-                dprintf (REGIONS_LOG, ("region %p->%p SIP",
-                    heap_segment_mem (ns), heap_segment_allocated (ns)));
-            }
-
-            ns = heap_segment_next (ns);
-        } while ((ns != 0) && heap_segment_swept_in_plan (ns));
-        return ns;
-    }
-#else //USE_REGIONS
-    return ns;
-#endif //USE_REGIONS
-}
-
-inline
-heap_segment* heap_segment_next_non_sip (heap_segment* seg)
-{
-    heap_segment* ns = heap_segment_next (seg);
-#ifdef USE_REGIONS
-    return heap_segment_non_sip (ns);
-#else
-    return ns;
-#endif //USE_REGIONS
-}
-
-heap_segment* heap_segment_rw (heap_segment* ns)
-{
-    if ((ns == 0) || !heap_segment_read_only_p (ns))
-    {
-        return ns;
-    }
-    else
-    {
-        do
-        {
-            ns = heap_segment_next (ns);
-        } while ((ns != 0) && heap_segment_read_only_p (ns));
-        return ns;
-    }
-}
-
-//returns the next non ro segment.
-heap_segment* heap_segment_next_rw (heap_segment* seg)
-{
-    heap_segment* ns = heap_segment_next (seg);
-    return heap_segment_rw (ns);
-}
-
-// returns the segment before seg.
-heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg)
-{
-    assert (begin != 0);
-    heap_segment* prev = begin;
-    heap_segment* current = heap_segment_next_rw (begin);
-
-    while (current && current != seg)
-    {
-        prev = current;
-        current = heap_segment_next_rw (current);
-    }
-
-    if (current == seg)
-    {
-        return prev;
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-// returns the segment before seg.
-
-
-heap_segment* heap_segment_in_range (heap_segment* ns)
-{
-    if ((ns == 0) || heap_segment_in_range_p (ns))
-    {
-        return ns;
-    }
-    else
-    {
-        do
-        {
-            ns = heap_segment_next (ns);
-        } while ((ns != 0) && !heap_segment_in_range_p (ns));
-        return ns;
-    }
-}
-
-heap_segment* heap_segment_next_in_range (heap_segment* seg)
-{
-    heap_segment* ns = heap_segment_next (seg);
-    return heap_segment_in_range (ns);
-}
-
-struct imemory_data
-{
-    uint8_t* memory_base;
-};
-
-struct numa_reserved_block
-{
-    uint8_t*        memory_base;
-    size_t          block_size;
-
-    numa_reserved_block() : memory_base(nullptr), block_size(0) { }
-};
-
-struct initial_memory_details
-{
-    imemory_data *initial_memory;
-    imemory_data *initial_normal_heap; // points into initial_memory_array
-    imemory_data *initial_large_heap;  // points into initial_memory_array
-    imemory_data *initial_pinned_heap; // points into initial_memory_array
-
-    size_t block_size_normal;
-    size_t block_size_large;
-    size_t block_size_pinned;
-
-    int block_count;                // # of blocks in each
-    int current_block_normal;
-    int current_block_large;
-    int current_block_pinned;
-
-    enum
-    {
-        ALLATONCE = 1,
-        EACH_GENERATION,
-        EACH_BLOCK,
-        ALLATONCE_SEPARATED_POH,
-        EACH_NUMA_NODE
-    };
-
-    size_t allocation_pattern;
-
-    size_t block_size(int i)
-    {
-        switch (i / block_count)
-        {
-            case 0: return block_size_normal;
-            case 1: return block_size_large;
-            case 2: return block_size_pinned;
-            default: UNREACHABLE();
-        }
-    };
-
-    void* get_initial_memory (int gen, int h_number)
-    {
-        switch (gen)
-        {
-            case soh_gen0:
-            case soh_gen1:
-            case soh_gen2: return initial_normal_heap[h_number].memory_base;
-            case loh_generation: return initial_large_heap[h_number].memory_base;
-            case poh_generation: return initial_pinned_heap[h_number].memory_base;
-            default: UNREACHABLE();
-        }
-    };
-
-    size_t get_initial_size (int gen)
-    {
-        switch (gen)
-        {
-            case soh_gen0:
-            case soh_gen1:
-            case soh_gen2: return block_size_normal;
-            case loh_generation: return block_size_large;
-            case poh_generation: return block_size_pinned;
-            default: UNREACHABLE();
-        }
-    };
-
-    int numa_reserved_block_count;
-    numa_reserved_block* numa_reserved_block_table;
-};
-
-initial_memory_details memory_details;
-
-heap_segment* make_initial_segment (int gen, int h_number, gc_heap* hp)
-{
-    void* mem = memory_details.get_initial_memory (gen, h_number);
-    size_t size = memory_details.get_initial_size (gen);
-    heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size, hp, gen);
-
-    return res;
-}
-
-void* virtual_alloc (size_t size)
-{
-    return virtual_alloc(size, false);
-}
-
-void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node)
-{
-    size_t requested_size = size;
-
-    if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size)
-    {
-        gc_heap::reserved_memory_limit = gc_heap::reserved_memory_limit + requested_size;
-        if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size)
-        {
-            return 0;
-        }
-    }
-
-    uint32_t flags = VirtualReserveFlags::None;
-#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
-    if (virtual_alloc_hardware_write_watch)
-    {
-        flags = VirtualReserveFlags::WriteWatch;
-    }
-#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
-
-    void* prgmem = use_large_pages_p ?
-        GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size, numa_node) :
-        GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags, numa_node);
-    void *aligned_mem = prgmem;
-
-    // We don't want (prgmem + size) to be right at the end of the address space
-    // because we'd have to worry about that everytime we do (address + size).
-    // We also want to make sure that we leave loh_size_threshold at the end
-    // so we allocate a small object we don't need to worry about overflow there
-    // when we do alloc_ptr+size.
-    if (prgmem)
-    {
-        uint8_t* end_mem = (uint8_t*)prgmem + requested_size;
-
-        if ((end_mem == 0) || ((size_t)(MAX_PTR - end_mem) <= END_SPACE_AFTER_GC))
-        {
-            GCToOSInterface::VirtualRelease (prgmem, requested_size);
-            dprintf (2, ("Virtual Alloc size %zd returned memory right against 4GB [%zx, %zx[ - discarding",
-                        requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size)));
-            prgmem = 0;
-            aligned_mem = 0;
-        }
-    }
-
-    if (prgmem)
-    {
-        gc_heap::reserved_memory += requested_size;
-    }
-
-    dprintf (2, ("Virtual Alloc size %zd: [%zx, %zx[",
-                 requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size)));
-
-    return aligned_mem;
-}
-
-static size_t get_valid_segment_size (BOOL large_seg=FALSE)
-{
-    size_t seg_size, initial_seg_size;
-
-    if (!large_seg)
-    {
-        initial_seg_size = INITIAL_ALLOC;
-        seg_size = static_cast<size_t>(GCConfig::GetSegmentSize());
-    }
-    else
-    {
-        initial_seg_size = LHEAP_ALLOC;
-        seg_size = static_cast<size_t>(GCConfig::GetSegmentSize()) / 2;
-    }
-
-#ifdef MULTIPLE_HEAPS
-#ifdef HOST_64BIT
-    if (!large_seg)
-#endif // HOST_64BIT
-    {
-        if (g_num_processors > 4)
-            initial_seg_size /= 2;
-        if (g_num_processors > 8)
-            initial_seg_size /= 2;
-    }
-#endif //MULTIPLE_HEAPS
-
-    // if seg_size is small but not 0 (0 is default if config not set)
-    // then set the segment to the minimum size
-    if (!g_theGCHeap->IsValidSegmentSize(seg_size))
-    {
-        // if requested size is between 1 byte and 4MB, use min
-        if ((seg_size >> 1) && !(seg_size >> 22))
-            seg_size = 1024*1024*4;
-        else
-            seg_size = initial_seg_size;
-    }
-
-#ifdef HOST_64BIT
-    seg_size = round_up_power2 (seg_size);
-#else
-    seg_size = round_down_power2 (seg_size);
-#endif // HOST_64BIT
-
-    return (seg_size);
-}
-
-#ifndef USE_REGIONS
-void
-gc_heap::compute_new_ephemeral_size()
-{
-    int eph_gen_max = max_generation - 1 - (settings.promotion ? 1 : 0);
-    size_t padding_size = 0;
-
-    for (int i = 0; i <= eph_gen_max; i++)
-    {
-        dynamic_data* dd = dynamic_data_of (i);
-        total_ephemeral_size += (dd_survived_size (dd) - dd_pinned_survived_size (dd));
-#ifdef RESPECT_LARGE_ALIGNMENT
-        total_ephemeral_size += dd_num_npinned_plugs (dd) * switch_alignment_size (FALSE);
-#endif //RESPECT_LARGE_ALIGNMENT
-#ifdef FEATURE_STRUCTALIGN
-        total_ephemeral_size += dd_num_npinned_plugs (dd) * MAX_STRUCTALIGN;
-#endif //FEATURE_STRUCTALIGN
-
-#ifdef SHORT_PLUGS
-        padding_size += dd_padding_size (dd);
-#endif //SHORT_PLUGS
-    }
-
-    total_ephemeral_size += eph_gen_starts_size;
-
-#ifdef RESPECT_LARGE_ALIGNMENT
-    size_t planned_ephemeral_size = heap_segment_plan_allocated (ephemeral_heap_segment) -
-                                       generation_plan_allocation_start (generation_of (max_generation-1));
-    total_ephemeral_size = min (total_ephemeral_size, planned_ephemeral_size);
-#endif //RESPECT_LARGE_ALIGNMENT
-
-#ifdef SHORT_PLUGS
-    total_ephemeral_size = Align ((size_t)((double)total_ephemeral_size * short_plugs_pad_ratio) + 1);
-    total_ephemeral_size += Align (DESIRED_PLUG_LENGTH);
-#endif //SHORT_PLUGS
-
-    dprintf (3, ("total ephemeral size is %zx, padding %zx(%zx)",
-        total_ephemeral_size,
-        padding_size, (total_ephemeral_size - padding_size)));
-}
-
-heap_segment*
-gc_heap::soh_get_segment_to_expand()
-{
-    size_t size = soh_segment_size;
-
-    ordered_plug_indices_init = FALSE;
-    use_bestfit = FALSE;
-
-    //compute the size of the new ephemeral heap segment.
-    compute_new_ephemeral_size();
-
-    if ((settings.pause_mode != pause_low_latency) &&
-        (settings.pause_mode != pause_no_gc)
-#ifdef BACKGROUND_GC
-        && (!gc_heap::background_running_p())
-#endif //BACKGROUND_GC
-        )
-    {
-        assert (settings.condemned_generation <= max_generation);
-        allocator*  gen_alloc = ((settings.condemned_generation == max_generation) ? nullptr :
-                              generation_allocator (generation_of (max_generation)));
-        dprintf (2, ("(gen%d)soh_get_segment_to_expand", settings.condemned_generation));
-
-        // try to find one in the gen 2 segment list, search backwards because the first segments
-        // tend to be more compact than the later ones.
-        heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
-
-        _ASSERTE(fseg != NULL);
-
-#ifdef SEG_REUSE_STATS
-        int try_reuse = 0;
-#endif //SEG_REUSE_STATS
-
-        heap_segment* seg = ephemeral_heap_segment;
-        while ((seg = heap_segment_prev_rw (fseg, seg)) && (seg != fseg))
-        {
-#ifdef SEG_REUSE_STATS
-        try_reuse++;
-#endif //SEG_REUSE_STATS
-
-            if (can_expand_into_p (seg, size/3, total_ephemeral_size, gen_alloc))
-            {
-                get_gc_data_per_heap()->set_mechanism (gc_heap_expand,
-                    (use_bestfit ? expand_reuse_bestfit : expand_reuse_normal));
-                if (settings.condemned_generation == max_generation)
-                {
-                    if (use_bestfit)
-                    {
-                        build_ordered_free_spaces (seg);
-                        dprintf (GTC_LOG, ("can use best fit"));
-                    }
-
-#ifdef SEG_REUSE_STATS
-                    dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse",
-                        settings.condemned_generation, try_reuse));
-#endif //SEG_REUSE_STATS
-                    dprintf (GTC_LOG, ("max_gen: Found existing segment to expand into %zx", (size_t)seg));
-                    return seg;
-                }
-                else
-                {
-#ifdef SEG_REUSE_STATS
-                    dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse - returning",
-                        settings.condemned_generation, try_reuse));
-#endif //SEG_REUSE_STATS
-                    dprintf (GTC_LOG, ("max_gen-1: Found existing segment to expand into %zx", (size_t)seg));
-
-                    // If we return 0 here, the allocator will think since we are short on end
-                    // of seg we need to trigger a full compacting GC. So if sustained low latency
-                    // is set we should acquire a new seg instead, that way we wouldn't be short.
-                    // The real solution, of course, is to actually implement seg reuse in gen1.
-                    if (settings.pause_mode != pause_sustained_low_latency)
-                    {
-                        dprintf (GTC_LOG, ("max_gen-1: SustainedLowLatency is set, acquire a new seg"));
-                        get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_next_full_gc);
-                        return 0;
-                    }
-                }
-            }
-        }
-    }
-
-    heap_segment* result = get_segment (size, gc_oh_num::soh);
-
-    if(result)
-    {
-#ifdef BACKGROUND_GC
-        if (current_c_gc_state == c_gc_state_planning)
-        {
-            // When we expand heap during bgc sweep, we set the seg to be swept so
-            // we'll always look at cards for objects on the new segment.
-            result->flags |= heap_segment_flags_swept;
-        }
-#endif //BACKGROUND_GC
-
-        FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(result),
-                                  (size_t)(heap_segment_reserved (result) - heap_segment_mem(result)),
-                                  gc_etw_segment_small_object_heap);
-    }
-
-    get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (result ? expand_new_seg : expand_no_memory));
-
-    if (result == 0)
-    {
-        dprintf (2, ("h%d: failed to allocate a new segment!", heap_number));
-    }
-    else
-    {
-#ifdef MULTIPLE_HEAPS
-        heap_segment_heap (result) = this;
-#endif //MULTIPLE_HEAPS
-    }
-
-    dprintf (GTC_LOG, ("(gen%d)creating new segment %p", settings.condemned_generation, result));
-    return result;
-}
-
-//returns 0 in case of allocation failure
-heap_segment*
-gc_heap::get_segment (size_t size, gc_oh_num oh)
-{
-    assert(oh != gc_oh_num::unknown);
-    BOOL uoh_p = (oh == gc_oh_num::loh) || (oh == gc_oh_num::poh);
-    if (heap_hard_limit)
-        return NULL;
-
-    heap_segment* result = 0;
-
-    if (segment_standby_list != 0)
-    {
-        result = segment_standby_list;
-        heap_segment* last = 0;
-        while (result)
-        {
-            size_t hs = (size_t)(heap_segment_reserved (result) - (uint8_t*)result);
-            if ((hs >= size) && ((hs / 2) < size))
-            {
-                dprintf (2, ("Hoarded segment %zx found", (size_t) result));
-                if (last)
-                {
-                    heap_segment_next (last) = heap_segment_next (result);
-                }
-                else
-                {
-                    segment_standby_list = heap_segment_next (result);
-                }
-                break;
-            }
-            else
-            {
-                last = result;
-                result = heap_segment_next (result);
-            }
-        }
-    }
-
-    if (result)
-    {
-        init_heap_segment (result, __this);
-#ifdef BACKGROUND_GC
-        if (is_bgc_in_progress())
-        {
-            dprintf (GC_TABLE_LOG, ("hoarded seg %p, mark_array is %p", result, mark_array));
-            if (!commit_mark_array_new_seg (__this, result))
-            {
-                dprintf (GC_TABLE_LOG, ("failed to commit mark array for hoarded seg"));
-                // If we can't use it we need to thread it back.
-                if (segment_standby_list != 0)
-                {
-                    heap_segment_next (result) = segment_standby_list;
-                    segment_standby_list = result;
-                }
-                else
-                {
-                    segment_standby_list = result;
-                }
-
-                result = 0;
-            }
-        }
-#endif //BACKGROUND_GC
-
-        if (result)
-            seg_mapping_table_add_segment (result, __this);
-    }
-
-    if (!result)
-    {
-        void* mem = virtual_alloc (size);
-        if (!mem)
-        {
-            fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p);
-            return 0;
-        }
-
-        result = make_heap_segment ((uint8_t*)mem, size, __this, (oh + max_generation));
-
-        if (result)
-        {
-            uint8_t* start;
-            uint8_t* end;
-            if (mem < g_gc_lowest_address)
-            {
-                start =  (uint8_t*)mem;
-            }
-            else
-            {
-                start = (uint8_t*)g_gc_lowest_address;
-            }
-
-            if (((uint8_t*)mem + size) > g_gc_highest_address)
-            {
-                end = (uint8_t*)mem + size;
-            }
-            else
-            {
-                end = (uint8_t*)g_gc_highest_address;
-            }
-
-            if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, uoh_p) != 0)
-            {
-                // release_segment needs the flags to decrement the proper bucket
-                size_t flags = 0;
-                if (oh == poh)
-                {
-                    flags = heap_segment_flags_poh;
-                }
-                else if (oh == loh)
-                {
-                    flags = heap_segment_flags_loh;
-                }
-                result->flags |= flags;
-                release_segment (result);
-                return 0;
-            }
-        }
-        else
-        {
-            fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p);
-            virtual_free (mem, size);
-        }
-
-        if (result)
-        {
-            seg_mapping_table_add_segment (result, __this);
-        }
-    }
-
-#ifdef BACKGROUND_GC
-    if (result)
-    {
-        ::record_changed_seg ((uint8_t*)result, heap_segment_reserved (result),
-                            settings.gc_index, current_bgc_state,
-                            seg_added);
-        bgc_verify_mark_array_cleared (result);
-    }
-#endif //BACKGROUND_GC
-
-    dprintf (GC_TABLE_LOG, ("h%d: new seg: %p-%p (%zd)", heap_number, result, ((uint8_t*)result + size), size));
-    return result;
-}
-
-#endif //!USE_REGIONS
-
-#ifdef MULTIPLE_HEAPS
-#ifdef HOST_X86
-#ifdef _MSC_VER
-#pragma warning(disable:4035)
-    static ptrdiff_t  get_cycle_count()
-    {
-        __asm   rdtsc
-    }
-#pragma warning(default:4035)
-#elif defined(__GNUC__)
-    static ptrdiff_t  get_cycle_count()
-    {
-        ptrdiff_t cycles;
-        ptrdiff_t cyclesHi;
-        __asm__ __volatile__
-        ("rdtsc":"=a" (cycles), "=d" (cyclesHi));
-        return cycles;
-    }
-#else //_MSC_VER
-#error Unknown compiler
-#endif //_MSC_VER
-#elif defined(TARGET_AMD64)
-#ifdef _MSC_VER
-extern "C" uint64_t __rdtsc();
-#pragma intrinsic(__rdtsc)
-    static ptrdiff_t get_cycle_count()
-    {
-        return (ptrdiff_t)__rdtsc();
-    }
-#elif defined(__GNUC__)
-    static ptrdiff_t get_cycle_count()
-    {
-        ptrdiff_t cycles;
-        ptrdiff_t cyclesHi;
-        __asm__ __volatile__
-        ("rdtsc":"=a" (cycles), "=d" (cyclesHi));
-        return (cyclesHi << 32) | cycles;
-    }
-#else // _MSC_VER
-    extern "C" ptrdiff_t get_cycle_count(void);
-#endif // _MSC_VER
-#elif defined(TARGET_LOONGARCH64)
-    static ptrdiff_t get_cycle_count()
-    {
-        ////FIXME: TODO for LOONGARCH64:
-        //ptrdiff_t  cycle;
-        __asm__ volatile ("break 0 \n");
-        return 0;
-    }
-#else
-    static ptrdiff_t get_cycle_count()
-    {
-        // @ARMTODO, @ARM64TODO, @WASMTODO: cycle counter is not exposed to user mode. For now (until we can show this
-        // makes a difference on the configurations on which we'll run) just return 0. This will result in
-        // all buffer access times being reported as equal in access_time().
-        return 0;
-    }
-#endif //TARGET_X86
-
-// We may not be on contiguous numa nodes so need to store
-// the node index as well.
-struct node_heap_count
-{
-    int node_no;
-    int heap_count;
-};
-
-class heap_select
-{
-    heap_select() {}
-public:
-    static uint8_t* sniff_buffer;
-    static unsigned n_sniff_buffers;
-    static unsigned cur_sniff_index;
-
-    static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
-    static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
-    static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-    static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
-
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-    // Note this is the total numa nodes GC heaps are on. There might be
-    // more on the machine if GC threads aren't using all of them.
-    static uint16_t total_numa_nodes;
-    static node_heap_count heaps_on_node[MAX_SUPPORTED_NODES];
-#endif
-
-    static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
-    {
-        ptrdiff_t start_cycles = get_cycle_count();
-        uint8_t sniff = sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE];
-        assert (sniff == 0);
-        ptrdiff_t elapsed_cycles = get_cycle_count() - start_cycles;
-        // add sniff here just to defeat the optimizer
-        elapsed_cycles += sniff;
-        return (int) elapsed_cycles;
-    }
-
-public:
-    static BOOL init(int n_heaps)
-    {
-        assert (sniff_buffer == NULL && n_sniff_buffers == 0);
-        if (!GCToOSInterface::CanGetCurrentProcessorNumber())
-        {
-            n_sniff_buffers = n_heaps*2+1;
-            size_t n_cache_lines = 1 + n_heaps * n_sniff_buffers + 1;
-            size_t sniff_buf_size = n_cache_lines * HS_CACHE_LINE_SIZE;
-            if (sniff_buf_size / HS_CACHE_LINE_SIZE != n_cache_lines) // check for overlow
-            {
-                return FALSE;
-            }
-
-            sniff_buffer = new (nothrow) uint8_t[sniff_buf_size];
-            if (sniff_buffer == 0)
-                return FALSE;
-            memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t));
-        }
-
-        bool do_numa = GCToOSInterface::CanEnableGCNumaAware();
-
-        // we want to assign heap indices such that there is a contiguous
-        // range of heap numbers for each numa node
-
-        // we do this in two passes:
-        // 1. gather processor numbers and numa node numbers for all heaps
-        // 2. assign heap numbers for each numa node
-
-        // Pass 1: gather processor numbers and numa node numbers
-        uint16_t proc_no[MAX_SUPPORTED_CPUS];
-        uint16_t node_no[MAX_SUPPORTED_CPUS];
-        uint16_t max_node_no = 0;
-        uint16_t heap_num;
-        for (heap_num = 0; heap_num < n_heaps; heap_num++)
-        {
-            if (!GCToOSInterface::GetProcessorForHeap (heap_num, &proc_no[heap_num], &node_no[heap_num]))
-                break;
-            assert(proc_no[heap_num] < MAX_SUPPORTED_CPUS);
-            if (!do_numa || node_no[heap_num] == NUMA_NODE_UNDEFINED)
-                node_no[heap_num] = 0;
-            max_node_no = max(max_node_no, node_no[heap_num]);
-        }
-
-        // Pass 2: assign heap numbers by numa node
-        int cur_heap_no = 0;
-        for (uint16_t cur_node_no = 0; cur_node_no <= max_node_no; cur_node_no++)
-        {
-            for (int i = 0; i < heap_num; i++)
-            {
-                if (node_no[i] != cur_node_no)
-                    continue;
-
-                // we found a heap on cur_node_no
-                heap_no_to_proc_no[cur_heap_no] = proc_no[i];
-                heap_no_to_numa_node[cur_heap_no] = cur_node_no;
-
-                cur_heap_no++;
-            }
-        }
-
-        return TRUE;
-    }
-
-    static void init_cpu_mapping(int heap_number)
-    {
-        if (GCToOSInterface::CanGetCurrentProcessorNumber())
-        {
-            uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber();
-            // For a 32-bit process running on a machine with > 64 procs,
-            // even though the process can only use up to 32 procs, the processor
-            // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0,
-            // the GetCurrentProcessorNumber will return a number that's >= 64.
-            proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS] = (uint16_t)heap_number;
-        }
-    }
-
-    static void mark_heap(int heap_number)
-    {
-        if (GCToOSInterface::CanGetCurrentProcessorNumber())
-            return;
-
-        for (unsigned sniff_index = 0; sniff_index < n_sniff_buffers; sniff_index++)
-            sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1;
-    }
-
-    static int select_heap(alloc_context* acontext)
-    {
-#ifndef TRACE_GC
-        UNREFERENCED_PARAMETER(acontext); // only referenced by dprintf
-#endif //TRACE_GC
-
-        if (GCToOSInterface::CanGetCurrentProcessorNumber())
-        {
-            uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber();
-            // For a 32-bit process running on a machine with > 64 procs,
-            // even though the process can only use up to 32 procs, the processor
-            // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0,
-            // the GetCurrentProcessorNumber will return a number that's >= 64.
-            int adjusted_heap = proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS];
-            // with dynamic heap count, need to make sure the value is in range.
-            if (adjusted_heap >= gc_heap::n_heaps)
-            {
-                adjusted_heap %= gc_heap::n_heaps;
-            }
-            return adjusted_heap;
-        }
-
-        unsigned sniff_index = Interlocked::Increment(&cur_sniff_index);
-        sniff_index %= n_sniff_buffers;
-
-        int best_heap = 0;
-        int best_access_time = 1000*1000*1000;
-        int second_best_access_time = best_access_time;
-
-        uint8_t *l_sniff_buffer = sniff_buffer;
-        unsigned l_n_sniff_buffers = n_sniff_buffers;
-        for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++)
-        {
-            int this_access_time = access_time(l_sniff_buffer, heap_number, sniff_index, l_n_sniff_buffers);
-            if (this_access_time < best_access_time)
-            {
-                second_best_access_time = best_access_time;
-                best_access_time = this_access_time;
-                best_heap = heap_number;
-            }
-            else if (this_access_time < second_best_access_time)
-            {
-                second_best_access_time = this_access_time;
-            }
-        }
-
-        if (best_access_time*2 < second_best_access_time)
-        {
-            sniff_buffer[(1 + best_heap*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1;
-
-            dprintf (3, ("select_heap yields crisp %d for context %p\n", best_heap, (void *)acontext));
-        }
-        else
-        {
-            dprintf (3, ("select_heap yields vague %d for context %p\n", best_heap, (void *)acontext ));
-        }
-
-        return best_heap;
-    }
-
-    static bool can_find_heap_fast()
-    {
-        return GCToOSInterface::CanGetCurrentProcessorNumber();
-    }
-
-    static uint16_t find_proc_no_from_heap_no(int heap_number)
-    {
-        return heap_no_to_proc_no[heap_number];
-    }
-
-    static uint16_t find_numa_node_from_heap_no(int heap_number)
-    {
-        return heap_no_to_numa_node[heap_number];
-    }
-
-    static void init_numa_node_to_heap_map(int nheaps)
-    {
-        // Called right after GCHeap::Init() for each heap
-        // For each NUMA node used by the heaps, the
-        // numa_node_to_heap_map[numa_node] is set to the first heap number on that node and
-        // numa_node_to_heap_map[numa_node + 1] is set to the first heap number not on that node
-        // Set the start of the heap number range for the first NUMA node
-        numa_node_to_heap_map[heap_no_to_numa_node[0]] = 0;
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-        total_numa_nodes = 0;
-        memset (heaps_on_node, 0, sizeof (heaps_on_node));
-        heaps_on_node[0].node_no = heap_no_to_numa_node[0];
-        heaps_on_node[0].heap_count = 1;
-#endif //HEAP_BALANCE_INSTRUMENTATION
-
-        for (int i=1; i < nheaps; i++)
-        {
-            if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1])
-            {
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-                total_numa_nodes++;
-                heaps_on_node[total_numa_nodes].node_no = heap_no_to_numa_node[i];
-#endif
-
-                // Set the end of the heap number range for the previous NUMA node
-                numa_node_to_heap_map[heap_no_to_numa_node[i-1] + 1] =
-                // Set the start of the heap number range for the current NUMA node
-                numa_node_to_heap_map[heap_no_to_numa_node[i]] = (uint16_t)i;
-            }
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-            (heaps_on_node[total_numa_nodes].heap_count)++;
-#endif
-        }
-
-        // Set the end of the heap range for the last NUMA node
-        numa_node_to_heap_map[heap_no_to_numa_node[nheaps-1] + 1] = (uint16_t)nheaps; //mark the end with nheaps
-
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-        total_numa_nodes++;
-#endif
-    }
-
-    static bool get_info_proc (int index, uint16_t* proc_no, uint16_t* node_no, int* start_heap, int* end_heap)
-    {
-        if (!GCToOSInterface::GetProcessorForHeap ((uint16_t)index, proc_no, node_no))
-            return false;
-
-        if (*node_no == NUMA_NODE_UNDEFINED)
-            *node_no = 0;
-
-        *start_heap = (int)numa_node_to_heap_map[*node_no];
-        *end_heap = (int)(numa_node_to_heap_map[*node_no + 1]);
-
-        return true;
-    }
-
-    static void distribute_other_procs (bool distribute_all_p)
-    {
-        if (affinity_config_specified_p)
-            return;
-
-        if (distribute_all_p)
-        {
-            uint16_t current_heap_no_on_node[MAX_SUPPORTED_CPUS];
-            memset (current_heap_no_on_node, 0, sizeof (current_heap_no_on_node));
-            uint16_t current_heap_no = 0;
-
-            uint16_t proc_no = 0;
-            uint16_t node_no = 0;
-
-            for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
-            {
-                int start_heap, end_heap;
-                if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap))
-                    break;
-
-                // This indicates there are heaps on this node
-                if ((end_heap - start_heap) > 0)
-                {
-                    proc_no_to_heap_no[proc_no] = (current_heap_no_on_node[node_no] % (uint16_t)(end_heap - start_heap)) + (uint16_t)start_heap;
-                    (current_heap_no_on_node[node_no])++;
-                }
-                else
-                {
-                    proc_no_to_heap_no[proc_no] = current_heap_no % gc_heap::n_heaps;
-                    (current_heap_no)++;
-                }
-            }
-        }
-        else
-        {
-            // This is for scenarios where GCHeapCount is specified as something like
-            // (g_num_active_processors - 2) to allow less randomization to the Server GC threads.
-            // In this case we want to assign the right heaps to those procs, ie if they share
-            // the same numa node we want to assign local heaps to those procs. Otherwise we
-            // let the heap balancing mechanism take over for now.
-            uint16_t proc_no = 0;
-            uint16_t node_no = 0;
-            int current_node_no = -1;
-            int current_heap_on_node = -1;
-
-            for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
-            {
-                int start_heap, end_heap;
-                if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap))
-                    break;
-
-                if ((end_heap - start_heap) > 0)
-                {
-                    if (node_no == current_node_no)
-                    {
-                        // We already iterated through all heaps on this node, don't add more procs to these
-                        // heaps.
-                        if (current_heap_on_node >= end_heap)
-                        {
-                            continue;
-                        }
-                    }
-                    else
-                    {
-                        current_node_no = node_no;
-                        current_heap_on_node = start_heap;
-                    }
-
-                    proc_no_to_heap_no[proc_no] = (uint16_t)current_heap_on_node;
-
-                    current_heap_on_node++;
-                }
-            }
+        if (mechanism >= 0)
+        {
+            descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i];
+            dprintf (DT_LOG_0, ("[%2d]%s%s",
+                        heap_index,
+                        descr->name,
+                        (descr->descr)[mechanism]));
         }
     }
+#endif //SIMPLE_DPRINTF && DT_LOG
+}
+
+void gc_history_global::print()
+{
+#ifdef DT_LOG
+    char str_settings[64];
+    memset (str_settings, '|', sizeof (char) * 64);
+    str_settings[max_global_mechanisms_count*2] = 0;
 
-    static void get_heap_range_for_heap(int hn, int* start, int* end)
+    for (int i = 0; i < max_global_mechanisms_count; i++)
     {
-        uint16_t numa_node = heap_no_to_numa_node[hn];
-        *start = (int)numa_node_to_heap_map[numa_node];
-        *end   = (int)(numa_node_to_heap_map[numa_node+1]);
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-        dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end));
-#endif //HEAP_BALANCE_INSTRUMENTATION
+        str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N');
     }
-};
-uint8_t* heap_select::sniff_buffer;
-unsigned heap_select::n_sniff_buffers;
-unsigned heap_select::cur_sniff_index;
-uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-uint16_t  heap_select::total_numa_nodes;
-node_heap_count heap_select::heaps_on_node[MAX_SUPPORTED_NODES];
-#endif
-
-#ifdef HEAP_BALANCE_INSTRUMENTATION
-// This records info we use to look at effect of different strategies
-// for heap balancing.
-struct heap_balance_info
-{
-    uint64_t timestamp;
-    // This also encodes when we detect the thread runs on
-    // different proc during a balance attempt. Sometimes
-    // I observe this happens multiple times during one attempt!
-    // If this happens, I just record the last proc we observe
-    // and set MSB.
-    int tid;
-    // This records the final alloc_heap for the thread.
-    //
-    // This also encodes the reason why we needed to set_home_heap
-    // in balance_heaps.
-    // If we set it because the home heap is not the same as the proc,
-    // we set MSB.
-    //
-    // If we set ideal proc, we set the 2nd MSB.
-    int alloc_heap;
-    int ideal_proc_no;
-};
 
-// This means inbetween each GC we can log at most this many entries per proc.
-// This is usually enough. Most of the time we only need to log something every 128k
-// of allocations in balance_heaps and gen0 budget is <= 200mb.
-#define default_max_hb_heap_balance_info 4096
+    dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|"));
 
-struct heap_balance_info_proc
-{
-    int count;
-    int index;
-    heap_balance_info hb_info[default_max_hb_heap_balance_info];
-};
+    dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings));
+    dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %zd(%d), memload %d",
+                        condemned_generation,
+                        str_gc_reasons[reason],
+                        str_gc_pause_modes[pause_mode],
+                        final_youngest_desired,
+                        gen0_reduction_count,
+                        mem_pressure));
+#endif //DT_LOG
+}
 
-struct heap_balance_info_numa
+#ifdef FEATURE_BASICFREEZE
+sorted_table*
+sorted_table::make_sorted_table ()
 {
-    heap_balance_info_proc* hb_info_procs;
-};
+    size_t size = 400;
 
-uint64_t start_raw_ts = 0;
-bool cpu_group_enabled_p = false;
-uint32_t procs_per_numa_node = 0;
-uint16_t total_numa_nodes_on_machine = 0;
-uint32_t procs_per_cpu_group = 0;
-uint16_t total_cpu_groups_on_machine = 0;
-// Note this is still on one of the numa nodes, so we'll incur a remote access
-// no matter what.
-heap_balance_info_numa* hb_info_numa_nodes = NULL;
+    // allocate one more bk to store the older slot address.
+    sorted_table* res = (sorted_table*)new (nothrow) char [sizeof (sorted_table) + (size + 1) * sizeof (bk)];
+    if (!res)
+        return 0;
+    res->size = size;
+    res->slots = (bk*)(res + 1);
+    res->old_slots = 0;
+    res->clear();
+    return res;
+}
 
-// TODO: This doesn't work for multiple nodes per CPU group yet.
-int get_proc_index_numa (int proc_no, int* numa_no)
+void
+sorted_table::delete_sorted_table()
 {
-    if (total_numa_nodes_on_machine == 1)
-    {
-        *numa_no = 0;
-        return proc_no;
-    }
-    else
+    if (slots != (bk*)(this+1))
     {
-        if (cpu_group_enabled_p)
-        {
-            // see vm\gcenv.os.cpp GroupProcNo implementation.
-            *numa_no = proc_no >> 6;
-            return (proc_no % 64);
-        }
-        else
-        {
-            *numa_no = proc_no / procs_per_numa_node;
-            return (proc_no % procs_per_numa_node);
-        }
+        delete[] slots;
     }
+    delete_old_slots();
 }
-
-
-
-const int hb_log_buffer_size = 4096;
-static char hb_log_buffer[hb_log_buffer_size];
-int last_hb_recorded_gc_index = -1;
-#endif //HEAP_BALANCE_INSTRUMENTATION
-
-void set_thread_affinity_for_heap (int heap_number, uint16_t proc_no)
+void
+sorted_table::delete_old_slots()
 {
-    if (!GCToOSInterface::SetThreadAffinity (proc_no))
+    uint8_t* sl = (uint8_t*)old_slots;
+    while (sl)
     {
-        dprintf (1, ("Failed to set thread affinity for GC thread %d on proc #%d", heap_number, proc_no));
+        uint8_t* dsl = sl;
+        sl = last_slot ((bk*)sl);
+        delete[] dsl;
     }
+    old_slots = 0;
 }
-
-#endif //MULTIPLE_HEAPS
-
-class mark
+void
+sorted_table::enqueue_old_slot(bk* sl)
 {
-public:
-    uint8_t* first;
-    size_t len;
-
-    // If we want to save space we can have a pool of plug_and_gap's instead of
-    // always having 2 allocated for each pinned plug.
-    gap_reloc_pair saved_pre_plug;
-    // If we decide to not compact, we need to restore the original values.
-    gap_reloc_pair saved_pre_plug_reloc;
-
-    gap_reloc_pair saved_post_plug;
-
-    // Supposedly Pinned objects cannot have references but we are seeing some from pinvoke
-    // frames. Also if it's an artificially pinned plug created by us, it can certainly
-    // have references.
-    // We know these cases will be rare so we can optimize this to be only allocated on demand.
-    gap_reloc_pair saved_post_plug_reloc;
-
-    // We need to calculate this after we are done with plan phase and before compact
-    // phase because compact phase will change the bricks so relocate_address will no
-    // longer work.
-    uint8_t* saved_pre_plug_info_reloc_start;
-
-    // We need to save this because we will have no way to calculate it, unlike the
-    // pre plug info start which is right before this plug.
-    uint8_t* saved_post_plug_info_start;
-
-#ifdef SHORT_PLUGS
-    uint8_t* allocation_context_start_region;
-#endif //SHORT_PLUGS
-
-    // How the bits in these bytes are organized:
-    // MSB --> LSB
-    // bit to indicate whether it's a short obj | 3 bits for refs in this short obj | 2 unused bits | bit to indicate if it's collectible | last bit
-    // last bit indicates if there's pre or post info associated with this plug. If it's not set all other bits will be 0.
-    BOOL saved_pre_p;
-    BOOL saved_post_p;
-
-#ifdef _DEBUG
-    // We are seeing this is getting corrupted for a PP with a NP after.
-    // Save it when we first set it and make sure it doesn't change.
-    gap_reloc_pair saved_post_plug_debug;
-#endif //_DEBUG
-
-    size_t get_max_short_bits()
-    {
-        return (sizeof (gap_reloc_pair) / sizeof (uint8_t*));
-    }
-
-    // pre bits
-    size_t get_pre_short_start_bit ()
-    {
-        return (sizeof (saved_pre_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*)));
-    }
-
-    BOOL pre_short_p()
-    {
-        return (saved_pre_p & (1 << (sizeof (saved_pre_p) * 8 - 1)));
-    }
-
-    void set_pre_short()
-    {
-        saved_pre_p |= (1 << (sizeof (saved_pre_p) * 8 - 1));
-    }
-
-    void set_pre_short_bit (size_t bit)
-    {
-        saved_pre_p |= 1 << (get_pre_short_start_bit() + bit);
-    }
-
-    BOOL pre_short_bit_p (size_t bit)
-    {
-        return (saved_pre_p & (1 << (get_pre_short_start_bit() + bit)));
-    }
-
-#ifdef COLLECTIBLE_CLASS
-    void set_pre_short_collectible()
-    {
-        saved_pre_p |= 2;
-    }
-
-    BOOL pre_short_collectible_p()
-    {
-        return (saved_pre_p & 2);
-    }
-#endif //COLLECTIBLE_CLASS
-
-    // post bits
-    size_t get_post_short_start_bit ()
-    {
-        return (sizeof (saved_post_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*)));
-    }
-
-    BOOL post_short_p()
-    {
-        return (saved_post_p & (1 << (sizeof (saved_post_p) * 8 - 1)));
-    }
-
-    void set_post_short()
-    {
-        saved_post_p |= (1 << (sizeof (saved_post_p) * 8 - 1));
-    }
-
-    void set_post_short_bit (size_t bit)
-    {
-        saved_post_p |= 1 << (get_post_short_start_bit() + bit);
-    }
-
-    BOOL post_short_bit_p (size_t bit)
-    {
-        return (saved_post_p & (1 << (get_post_short_start_bit() + bit)));
-    }
-
-#ifdef COLLECTIBLE_CLASS
-    void set_post_short_collectible()
-    {
-        saved_post_p |= 2;
-    }
-
-    BOOL post_short_collectible_p()
-    {
-        return (saved_post_p & 2);
-    }
-#endif //COLLECTIBLE_CLASS
-
-    uint8_t* get_plug_address() { return first; }
-
-    BOOL has_pre_plug_info() { return saved_pre_p; }
-    BOOL has_post_plug_info() { return saved_post_p; }
-
-    gap_reloc_pair* get_pre_plug_reloc_info() { return &saved_pre_plug_reloc; }
-    gap_reloc_pair* get_post_plug_reloc_info() { return &saved_post_plug_reloc; }
-    void set_pre_plug_info_reloc_start (uint8_t* reloc) { saved_pre_plug_info_reloc_start = reloc; }
-    uint8_t* get_post_plug_info_start() { return saved_post_plug_info_start; }
-
-    // We need to temporarily recover the shortened plugs for compact phase so we can
-    // copy over the whole plug and their related info (mark bits/cards). But we will
-    // need to set the artificial gap back so compact phase can keep reading the plug info.
-    // We also need to recover the saved info because we'll need to recover it later.
-    //
-    // So we would call swap_p*_plug_and_saved once to recover the object info; then call
-    // it again to recover the artificial gap.
-    void swap_pre_plug_and_saved()
-    {
-        gap_reloc_pair temp;
-        memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp));
-        memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc));
-        saved_pre_plug_reloc = temp;
-    }
+    last_slot (sl) = (uint8_t*)old_slots;
+    old_slots = sl;
+}
 
-    void swap_post_plug_and_saved()
+inline
+size_t
+sorted_table::lookup (uint8_t*& add)
+{
+    ptrdiff_t high = (count-1);
+    ptrdiff_t low = 0;
+    ptrdiff_t ti;
+    ptrdiff_t mid;
+    bk* buck = buckets();
+    while (low <= high)
     {
-        gap_reloc_pair temp;
-        memcpy (&temp, saved_post_plug_info_start, sizeof (temp));
-        memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc));
-        saved_post_plug_reloc = temp;
+        mid = ((low + high)/2);
+        ti = mid;
+        if (buck[ti].add > add)
+        {
+            if ((ti > 0) && (buck[ti-1].add <= add))
+            {
+                add = buck[ti-1].add;
+                return buck[ti - 1].val;
+            }
+            high = mid - 1;
+        }
+        else
+        {
+            if (buck[ti+1].add > add)
+            {
+                add = buck[ti].add;
+                return buck[ti].val;
+            }
+            low = mid + 1;
+        }
     }
+    add = 0;
+    return 0;
+}
 
-    void swap_pre_plug_and_saved_for_profiler()
+BOOL
+sorted_table::ensure_space_for_insert()
+{
+    if (count == size)
     {
-        gap_reloc_pair temp;
-        memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp));
-        memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug));
-        saved_pre_plug = temp;
-    }
+        size = (size * 3)/2;
+        assert((size * sizeof (bk)) > 0);
+        bk* res = (bk*)new (nothrow) char [(size + 1) * sizeof (bk)];
+        assert (res);
+        if (!res)
+            return FALSE;
 
-    void swap_post_plug_and_saved_for_profiler()
-    {
-        gap_reloc_pair temp;
-        memcpy (&temp, saved_post_plug_info_start, sizeof (temp));
-        memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug));
-        saved_post_plug = temp;
+        last_slot (res) = 0;
+        memcpy (((bk*)res + 1), buckets(), count * sizeof (bk));
+        bk* last_old_slots = slots;
+        slots = res;
+        if (last_old_slots != (bk*)(this + 1))
+            enqueue_old_slot (last_old_slots);
     }
+    return TRUE;
+}
 
-    // We should think about whether it's really necessary to have to copy back the pre plug
-    // info since it was already copied during compacting plugs. But if a plug doesn't move
-    // by >= 3 ptr size (the size of gap_reloc_pair), it means we'd have to recover pre plug info.
-    size_t recover_plug_info()
-    {
-        // We need to calculate the size for sweep case in order to correctly record the
-        // free_obj_space - sweep would've made these artificial gaps into free objects and
-        // we would need to deduct the size because now we are writing into those free objects.
-        size_t recovered_sweep_size = 0;
+BOOL
+sorted_table::insert (uint8_t* add, size_t val)
+{
+    //grow if no more room
+    assert (count < size);
 
-        if (saved_pre_p)
+    //insert sorted
+    ptrdiff_t high = (count-1);
+    ptrdiff_t low = 0;
+    ptrdiff_t ti;
+    ptrdiff_t mid;
+    bk* buck = buckets();
+    while (low <= high)
+    {
+        mid = ((low + high)/2);
+        ti = mid;
+        if (buck[ti].add > add)
         {
-            if (gc_heap::settings.compaction)
+            if ((ti == 0) || (buck[ti-1].add <= add))
             {
-                dprintf (3, ("%p: REC Pre: %p-%p",
-                    first,
-                    &saved_pre_plug_reloc,
-                    saved_pre_plug_info_reloc_start));
-                memcpy (saved_pre_plug_info_reloc_start, &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc));
+                // found insertion point
+                for (ptrdiff_t k = count; k > ti;k--)
+                {
+                    buck [k] = buck [k-1];
+                }
+                buck[ti].add = add;
+                buck[ti].val = val;
+                count++;
+                return TRUE;
             }
-            else
+            high = mid - 1;
+        }
+        else
+        {
+            if (buck[ti+1].add > add)
             {
-                dprintf (3, ("%p: REC Pre: %p-%p",
-                    first,
-                    &saved_pre_plug,
-                    (first - sizeof (plug_and_gap))));
-                memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug));
-                recovered_sweep_size += sizeof (saved_pre_plug);
+                //found the insertion point
+                for (ptrdiff_t k = count; k > ti+1;k--)
+                {
+                    buck [k] = buck [k-1];
+                }
+                buck[ti+1].add = add;
+                buck[ti+1].val = val;
+                count++;
+                return TRUE;
             }
+            low = mid + 1;
         }
+    }
+    assert (0);
+    return TRUE;
+}
 
-        if (saved_post_p)
+void
+sorted_table::remove (uint8_t* add)
+{
+    ptrdiff_t high = (count-1);
+    ptrdiff_t low = 0;
+    ptrdiff_t ti;
+    ptrdiff_t mid;
+    bk* buck = buckets();
+    while (low <= high)
+    {
+        mid = ((low + high)/2);
+        ti = mid;
+        if (buck[ti].add > add)
         {
-            if (gc_heap::settings.compaction)
+            if (buck[ti-1].add <= add)
             {
-                dprintf (3, ("%p: REC Post: %p-%p",
-                    first,
-                    &saved_post_plug_reloc,
-                    saved_post_plug_info_start));
-                memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc));
+                for (ptrdiff_t k = ti; k < count; k++)
+                    buck[k-1] = buck[k];
+                count--;
+                return;
             }
-            else
+            high = mid - 1;
+        }
+        else
+        {
+            if (buck[ti+1].add > add)
             {
-                dprintf (3, ("%p: REC Post: %p-%p",
-                    first,
-                    &saved_post_plug,
-                    saved_post_plug_info_start));
-                memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug));
-                recovered_sweep_size += sizeof (saved_post_plug);
+                for (ptrdiff_t k = ti+1; k < count; k++)
+                    buck[k-1] = buck[k];
+                count--;
+                return;
             }
+            low = mid + 1;
         }
-
-        return recovered_sweep_size;
     }
-};
-
+    assert (0);
+}
 
-void gc_mechanisms::init_mechanisms()
+void
+sorted_table::clear()
 {
-    condemned_generation = 0;
-    promotion = FALSE;//TRUE;
-    compaction = TRUE;
-#ifdef FEATURE_LOH_COMPACTION
-    loh_compaction = gc_heap::loh_compaction_requested();
-#else
-    loh_compaction = FALSE;
-#endif //FEATURE_LOH_COMPACTION
-    heap_expansion = FALSE;
-    concurrent = FALSE;
-    demotion = FALSE;
-    elevation_reduced = FALSE;
-    found_finalizers = FALSE;
-#ifdef BACKGROUND_GC
-    background_p = gc_heap::background_running_p() != FALSE;
-#endif //BACKGROUND_GC
+    count = 1;
+    buckets()[0].add = MAX_PTR;
+}
+#endif //FEATURE_BASICFREEZE
 
-    entry_memory_load = 0;
-    entry_available_physical_mem = 0;
-    exit_memory_load = 0;
+#ifdef FEATURE_BASICFREEZE
 
-#ifdef STRESS_HEAP
-    stress_induced = FALSE;
-#endif // STRESS_HEAP
+heap_segment* ro_segment_lookup (uint8_t* o)
+{
+    uint8_t* ro_seg_start = o;
+    heap_segment* seg = (heap_segment*)gc_heap::seg_table->lookup (ro_seg_start);
+
+    if (ro_seg_start && in_range_for_segment (o, seg))
+        return seg;
+    else
+        return 0;
 }
 
-void gc_mechanisms::first_init()
+#endif //FEATURE_BASICFREEZE
+
+#ifdef MULTIPLE_HEAPS
+inline
+gc_heap* seg_mapping_table_heap_of_worker (uint8_t* o)
 {
-    gc_index = 0;
-    gen0_reduction_count = 0;
-    should_lock_elevation = FALSE;
-    elevation_locked_count = 0;
-    reason = reason_empty;
-#ifdef BACKGROUND_GC
-    pause_mode = gc_heap::gc_can_use_concurrent ? pause_interactive : pause_batch;
+    size_t index = (size_t)o >> gc_heap::min_segment_size_shr;
+    seg_mapping* entry = &seg_mapping_table[index];
+
+#ifdef USE_REGIONS
+    gc_heap* hp = heap_segment_heap ((heap_segment*)entry);
+#else
+    gc_heap* hp = ((o > entry->boundary) ? entry->h1 : entry->h0);
+
+    dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, h0: %p, seg0: %p, h1: %p, seg1: %p",
+        o, index, (entry->boundary + 1),
+        (uint8_t*)(entry->h0), (uint8_t*)(entry->seg0),
+        (uint8_t*)(entry->h1), (uint8_t*)(entry->seg1)));
+
 #ifdef _DEBUG
-    int debug_pause_mode = static_cast<int>(GCConfig::GetLatencyMode());
-    if (debug_pause_mode >= 0)
+    heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0);
+#ifdef FEATURE_BASICFREEZE
+    if ((size_t)seg & ro_in_entry)
+        seg = (heap_segment*)((size_t)seg & ~ro_in_entry);
+#endif //FEATURE_BASICFREEZE
+
+#ifdef TRACE_GC
+    if (seg)
     {
-        assert (debug_pause_mode <= pause_sustained_low_latency);
-        pause_mode = (gc_pause_mode)debug_pause_mode;
+        if (in_range_for_segment (o, seg))
+        {
+            dprintf (2, ("obj %p belongs to segment %p(-%p)", o, seg, (uint8_t*)heap_segment_allocated (seg)));
+        }
+        else
+        {
+            dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg",
+                seg, (uint8_t*)heap_segment_allocated (seg), o));
+        }
+    }
+    else
+    {
+        dprintf (2, ("could not find obj %p in any existing segments", o));
     }
+#endif //TRACE_GC
 #endif //_DEBUG
-#else //BACKGROUND_GC
-    pause_mode = pause_batch;
-#endif //BACKGROUND_GC
-
-    init_mechanisms();
+#endif //USE_REGIONS
+    return hp;
 }
 
-void gc_mechanisms::record (gc_history_global* history)
-{
-#ifdef MULTIPLE_HEAPS
-    history->num_heaps = gc_heap::n_heaps;
-#else
-    history->num_heaps = 1;
+
 #endif //MULTIPLE_HEAPS
 
-    history->condemned_generation = condemned_generation;
-    history->gen0_reduction_count = gen0_reduction_count;
-    history->reason = reason;
-    history->pause_mode = (int)pause_mode;
-    history->mem_pressure = entry_memory_load;
-    history->global_mechanisms_p = 0;
+// Only returns a valid seg if we can actually find o on the seg.
+heap_segment* seg_mapping_table_segment_of (uint8_t* o)
+{
+#ifdef FEATURE_BASICFREEZE
+    if ((o < g_gc_lowest_address) || (o >= g_gc_highest_address))
+        return ro_segment_lookup (o);
+#endif //FEATURE_BASICFREEZE
 
-    // start setting the boolean values.
-    if (concurrent)
-        history->set_mechanism_p (global_concurrent);
+    size_t index = (size_t)o >> gc_heap::min_segment_size_shr;
+    seg_mapping* entry = &seg_mapping_table[index];
 
-    if (compaction)
-        history->set_mechanism_p (global_compaction);
+#ifdef USE_REGIONS
+    // REGIONS TODO: I think we could simplify this to having the same info for each
+    // basic entry in a large region so we can get it right away instead of having to go
+    // back some entries.
+    ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated ((heap_segment*)entry);
+    if (first_field == 0)
+    {
+        dprintf (REGIONS_LOG, ("asked for seg for %p, in a freed region mem: %p, committed %p",
+            o, heap_segment_mem ((heap_segment*)entry),
+            heap_segment_committed ((heap_segment*)entry)));
+        return 0;
+    }
+    // Regions are never going to intersect an ro seg, so this can never be ro_in_entry.
+    assert (first_field != 0);
+    assert (first_field != ro_in_entry);
+    if (first_field < 0)
+    {
+        index += first_field;
+    }
+    heap_segment* seg = (heap_segment*)&seg_mapping_table[index];
+#else //USE_REGIONS
+    dprintf (2, ("checking obj %p, index is %zd, entry: boundary: %p, seg0: %p, seg1: %p",
+        o, index, (entry->boundary + 1),
+        (uint8_t*)(entry->seg0), (uint8_t*)(entry->seg1)));
 
-    if (promotion)
-        history->set_mechanism_p (global_promotion);
+    heap_segment* seg = ((o > entry->boundary) ? entry->seg1 : entry->seg0);
+#ifdef FEATURE_BASICFREEZE
+    if ((size_t)seg & ro_in_entry)
+        seg = (heap_segment*)((size_t)seg & ~ro_in_entry);
+#endif //FEATURE_BASICFREEZE
+#endif //USE_REGIONS
 
-    if (demotion)
-        history->set_mechanism_p (global_demotion);
+    if (seg)
+    {
+        if (in_range_for_segment (o, seg))
+        {
+            dprintf (2, ("obj %p belongs to segment %p(-%p)", o, (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg)));
+        }
+        else
+        {
+            dprintf (2, ("found seg %p(-%p) for obj %p, but it's not on the seg, setting it to 0",
+                (uint8_t*)heap_segment_mem(seg), (uint8_t*)heap_segment_reserved(seg), o));
+            seg = 0;
+        }
+    }
+    else
+    {
+        dprintf (2, ("could not find obj %p in any existing segments", o));
+    }
 
-    if (card_bundles)
-        history->set_mechanism_p (global_card_bundles);
+#ifdef FEATURE_BASICFREEZE
+    // TODO: This was originally written assuming that the seg_mapping_table would always contain entries for ro
+    // segments whenever the ro segment falls into the [g_gc_lowest_address,g_gc_highest_address) range.  I.e., it had an
+    // extra "&& (size_t)(entry->seg1) & ro_in_entry" expression.  However, at the moment, grow_brick_card_table does
+    // not correctly go through the ro segments and add them back to the seg_mapping_table when the [lowest,highest)
+    // range changes.  We should probably go ahead and modify grow_brick_card_table and put back the
+    // "&& (size_t)(entry->seg1) & ro_in_entry" here.
+    if (!seg)
+    {
+        seg = ro_segment_lookup (o);
+        if (seg && !in_range_for_segment (o, seg))
+            seg = 0;
+    }
+#endif //FEATURE_BASICFREEZE
 
-    if (elevation_reduced)
-        history->set_mechanism_p (global_elevation);
+    return seg;
 }
 
-/**********************************
-   called at the beginning of GC to fix the allocated size to
-   what is really allocated, or to turn the free area into an unused object
-   It needs to be called after all of the other allocation contexts have been
-   fixed since it relies on alloc_allocated.
- ********************************/
-
+size_t gcard_of ( uint8_t*);
 
+#define slot(i, j) ((uint8_t**)(i))[(j)+1]
 
-inline
-BOOL grow_mark_stack (mark*& m, size_t& len, size_t init_len)
+heap_segment* heap_segment_rw (heap_segment* ns)
 {
-    size_t new_size = max (init_len, 2*len);
-    mark* tmp = new (nothrow) mark [new_size];
-    if (tmp)
+    if ((ns == 0) || !heap_segment_read_only_p (ns))
     {
-        memcpy (tmp, m, len * sizeof (mark));
-        delete[] m;
-        m = tmp;
-        len = new_size;
-        return TRUE;
+        return ns;
     }
     else
     {
-        dprintf (1, ("Failed to allocate %zd bytes for mark stack", (len * sizeof (mark))));
-        return FALSE;
+        do
+        {
+            ns = heap_segment_next (ns);
+        } while ((ns != 0) && heap_segment_read_only_p (ns));
+        return ns;
     }
 }
 
-inline
-uint8_t* pinned_plug (mark* m)
-{
-   return m->first;
-}
-
-inline
-size_t& pinned_len (mark* m)
+//returns the next non ro segment.
+heap_segment* heap_segment_next_rw (heap_segment* seg)
 {
-    return m->len;
+    heap_segment* ns = heap_segment_next (seg);
+    return heap_segment_rw (ns);
 }
 
-inline
-void set_new_pin_info (mark* m, uint8_t* pin_free_space_start)
+// returns the segment before seg.
+heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg)
 {
-    m->len = pinned_plug (m) - pin_free_space_start;
-#ifdef SHORT_PLUGS
-    m->allocation_context_start_region = pin_free_space_start;
-#endif //SHORT_PLUGS
-}
+    assert (begin != 0);
+    heap_segment* prev = begin;
+    heap_segment* current = heap_segment_next_rw (begin);
 
-#ifdef SHORT_PLUGS
-inline
-uint8_t*& pin_allocation_context_start_region (mark* m)
-{
-    return m->allocation_context_start_region;
-}
+    while (current && current != seg)
+    {
+        prev = current;
+        current = heap_segment_next_rw (current);
+    }
 
-uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry)
-{
-    uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info());
-    uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap)));
-    //dprintf (2, ("detected a very short plug: %zx before PP %zx, pad %zx",
-    //    old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
-    dprintf (2, ("EP: %p(%p), %p", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
-    return plug_start_in_saved;
+    if (current == seg)
+    {
+        return prev;
+    }
+    else
+    {
+        return 0;
+    }
 }
 
+initial_memory_details memory_details;
 
-#endif //SHORT_PLUGS
-
-#ifdef CARD_BUNDLE
-// The card bundle keeps track of groups of card words.
-static const size_t card_bundle_word_width = 32;
-
-// How do we express the fact that 32 bits (card_word_width) is one uint32_t?
-static const size_t card_bundle_size = (size_t)(GC_PAGE_SIZE / (sizeof(uint32_t)*card_bundle_word_width));
-
-inline
-size_t card_bundle_word (size_t cardb)
+heap_segment* make_initial_segment (int gen, int h_number, gc_heap* hp)
 {
-    return cardb / card_bundle_word_width;
-}
+    void* mem = memory_details.get_initial_memory (gen, h_number);
+    size_t size = memory_details.get_initial_size (gen);
+    heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size, hp, gen);
 
-inline
-uint32_t card_bundle_bit (size_t cardb)
-{
-    return (uint32_t)(cardb % card_bundle_word_width);
+    return res;
 }
 
-size_t align_cardw_on_bundle (size_t cardw)
+void* virtual_alloc (size_t size)
 {
-    return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 ));
+    return virtual_alloc(size, false);
 }
 
-// Get the card bundle representing a card word
-size_t cardw_card_bundle (size_t cardw)
+void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node)
 {
-    return cardw / card_bundle_size;
-}
+    size_t requested_size = size;
 
-// Get the first card word in a card bundle
-size_t card_bundle_cardw (size_t cardb)
-{
-    return cardb * card_bundle_size;
-}
+    if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size)
+    {
+        gc_heap::reserved_memory_limit = gc_heap::reserved_memory_limit + requested_size;
+        if ((gc_heap::reserved_memory_limit - gc_heap::reserved_memory) < requested_size)
+        {
+            return 0;
+        }
+    }
 
+    uint32_t flags = VirtualReserveFlags::None;
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+    if (virtual_alloc_hardware_write_watch)
+    {
+        flags = VirtualReserveFlags::WriteWatch;
+    }
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 
-// Takes a pointer to a card bundle table and an address, and returns a pointer that represents
-// where a theoretical card bundle table that represents every address (starting from 0) would
-// start if the bundle word representing the address were to be located at the pointer passed in.
-// The returned 'translated' pointer makes it convenient/fast to calculate where the card bundle
-// for a given address is using a simple shift operation on the address.
-uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address)
-{
-    // The number of bytes of heap memory represented by a card bundle word
-    const size_t heap_bytes_for_bundle_word = card_size * card_word_width * card_bundle_size * card_bundle_word_width;
+    void* prgmem = use_large_pages_p ?
+        GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size, numa_node) :
+        GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags, numa_node);
+    void *aligned_mem = prgmem;
 
-    // Each card bundle word is 32 bits
-    return (uint32_t*)((uint8_t*)cb - (((size_t)lowest_address / heap_bytes_for_bundle_word) * sizeof (uint32_t)));
-}
+    // We don't want (prgmem + size) to be right at the end of the address space
+    // because we'd have to worry about that everytime we do (address + size).
+    // We also want to make sure that we leave loh_size_threshold at the end
+    // so we allocate a small object we don't need to worry about overflow there
+    // when we do alloc_ptr+size.
+    if (prgmem)
+    {
+        uint8_t* end_mem = (uint8_t*)prgmem + requested_size;
 
-#endif // CARD_BUNDLE
+        if ((end_mem == 0) || ((size_t)(MAX_PTR - end_mem) <= END_SPACE_AFTER_GC))
+        {
+            GCToOSInterface::VirtualRelease (prgmem, requested_size);
+            dprintf (2, ("Virtual Alloc size %zd returned memory right against 4GB [%zx, %zx[ - discarding",
+                        requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size)));
+            prgmem = 0;
+            aligned_mem = 0;
+        }
+    }
 
-#if defined (HOST_64BIT)
-#define brick_size ((size_t)4096)
-#else
-#define brick_size ((size_t)2048)
-#endif //HOST_64BIT
+    if (prgmem)
+    {
+        gc_heap::reserved_memory += requested_size;
+    }
 
-inline
-uint8_t* align_on_brick (uint8_t* add)
-{
-    return (uint8_t*)((size_t)(add + brick_size - 1) & ~(brick_size - 1));
-}
+    dprintf (2, ("Virtual Alloc size %zd: [%zx, %zx[",
+                 requested_size, (size_t)prgmem, (size_t)((uint8_t*)prgmem+requested_size)));
 
-inline
-uint8_t* align_lower_brick (uint8_t* add)
-{
-    return (uint8_t*)(((size_t)add) & ~(brick_size - 1));
+    return aligned_mem;
 }
 
-size_t size_brick_of (uint8_t* from, uint8_t* end)
+size_t get_valid_segment_size (BOOL large_seg)
 {
-    assert (((size_t)from & (brick_size-1)) == 0);
-    assert (((size_t)end  & (brick_size-1)) == 0);
+    size_t seg_size, initial_seg_size;
 
-    return ((end - from) / brick_size) * sizeof (short);
-}
+    if (!large_seg)
+    {
+        initial_seg_size = INITIAL_ALLOC;
+        seg_size = static_cast<size_t>(GCConfig::GetSegmentSize());
+    }
+    else
+    {
+        initial_seg_size = LHEAP_ALLOC;
+        seg_size = static_cast<size_t>(GCConfig::GetSegmentSize()) / 2;
+    }
 
-inline
-uint8_t* align_on_card (uint8_t* add)
-{
-    return (uint8_t*)((size_t)(add + card_size - 1) & ~(card_size - 1 ));
-}
-inline
-uint8_t* align_on_card_word (uint8_t* add)
-{
-    return (uint8_t*) ((size_t)(add + (card_size*card_word_width)-1) & ~(card_size*card_word_width - 1));
-}
+#ifdef MULTIPLE_HEAPS
+#ifdef HOST_64BIT
+    if (!large_seg)
+#endif // HOST_64BIT
+    {
+        if (g_num_processors > 4)
+            initial_seg_size /= 2;
+        if (g_num_processors > 8)
+            initial_seg_size /= 2;
+    }
+#endif //MULTIPLE_HEAPS
 
-inline
-uint8_t* align_lower_card (uint8_t* add)
-{
-    return (uint8_t*)((size_t)add & ~(card_size-1));
-}
+    // if seg_size is small but not 0 (0 is default if config not set)
+    // then set the segment to the minimum size
+    if (!g_theGCHeap->IsValidSegmentSize(seg_size))
+    {
+        // if requested size is between 1 byte and 4MB, use min
+        if ((seg_size >> 1) && !(seg_size >> 22))
+            seg_size = 1024*1024*4;
+        else
+            seg_size = initial_seg_size;
+    }
 
-// Returns the number of DWORDs in the card table that cover the
-// range of addresses [from, end[.
-size_t count_card_of (uint8_t* from, uint8_t* end)
-{
-    return card_word (gcard_of (end - 1)) - card_word (gcard_of (from)) + 1;
+#ifdef HOST_64BIT
+    seg_size = round_up_power2 (seg_size);
+#else
+    seg_size = round_down_power2 (seg_size);
+#endif // HOST_64BIT
+
+    return (seg_size);
 }
 
-// Returns the number of bytes to allocate for a card table
-// that covers the range of addresses [from, end[.
-size_t size_card_of (uint8_t* from, uint8_t* end)
+#ifndef USE_REGIONS
+void
+gc_heap::compute_new_ephemeral_size()
 {
-    return count_card_of (from, end) * sizeof(uint32_t);
-}
+    int eph_gen_max = max_generation - 1 - (settings.promotion ? 1 : 0);
+    size_t padding_size = 0;
 
-// We don't store seg_mapping_table in card_table_info because there's only always one view.
-class card_table_info
-{
-public:
-    unsigned    recount;
-    size_t      size;
-    uint32_t*   next_card_table;
+    for (int i = 0; i <= eph_gen_max; i++)
+    {
+        dynamic_data* dd = dynamic_data_of (i);
+        total_ephemeral_size += (dd_survived_size (dd) - dd_pinned_survived_size (dd));
+#ifdef RESPECT_LARGE_ALIGNMENT
+        total_ephemeral_size += dd_num_npinned_plugs (dd) * switch_alignment_size (FALSE);
+#endif //RESPECT_LARGE_ALIGNMENT
+#ifdef FEATURE_STRUCTALIGN
+        total_ephemeral_size += dd_num_npinned_plugs (dd) * MAX_STRUCTALIGN;
+#endif //FEATURE_STRUCTALIGN
 
-    uint8_t*    lowest_address;
-    uint8_t*    highest_address;
-    short*      brick_table;
+#ifdef SHORT_PLUGS
+        padding_size += dd_padding_size (dd);
+#endif //SHORT_PLUGS
+    }
 
-#ifdef CARD_BUNDLE
-    uint32_t*   card_bundle_table;
-#endif //CARD_BUNDLE
+    total_ephemeral_size += eph_gen_starts_size;
 
-    // mark_array is always at the end of the data structure because we
-    // want to be able to make one commit call for everything before it.
-#ifdef BACKGROUND_GC
-    uint32_t*   mark_array;
-#endif //BACKGROUND_GC
-};
+#ifdef RESPECT_LARGE_ALIGNMENT
+    size_t planned_ephemeral_size = heap_segment_plan_allocated (ephemeral_heap_segment) -
+                                       generation_plan_allocation_start (generation_of (max_generation-1));
+    total_ephemeral_size = min (total_ephemeral_size, planned_ephemeral_size);
+#endif //RESPECT_LARGE_ALIGNMENT
 
-static_assert(offsetof(dac_card_table_info, size) == offsetof(card_table_info, size), "DAC card_table_info layout mismatch");
-static_assert(offsetof(dac_card_table_info, next_card_table) == offsetof(card_table_info, next_card_table), "DAC card_table_info layout mismatch");
+#ifdef SHORT_PLUGS
+    total_ephemeral_size = Align ((size_t)((double)total_ephemeral_size * short_plugs_pad_ratio) + 1);
+    total_ephemeral_size += Align (DESIRED_PLUG_LENGTH);
+#endif //SHORT_PLUGS
 
-//These are accessors on untranslated cardtable
-inline
-unsigned& card_table_refcount (uint32_t* c_table)
-{
-    return *(unsigned*)((char*)c_table - sizeof (card_table_info));
+    dprintf (3, ("total ephemeral size is %zx, padding %zx(%zx)",
+        total_ephemeral_size,
+        padding_size, (total_ephemeral_size - padding_size)));
 }
 
-inline
-uint8_t*& card_table_lowest_address (uint32_t* c_table)
+heap_segment*
+gc_heap::soh_get_segment_to_expand()
 {
-    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->lowest_address;
-}
+    size_t size = soh_segment_size;
 
-uint32_t* translate_card_table (uint32_t* ct)
-{
-    return (uint32_t*)((uint8_t*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(uint32_t));
-}
+    ordered_plug_indices_init = FALSE;
+    use_bestfit = FALSE;
 
-inline
-uint8_t*& card_table_highest_address (uint32_t* c_table)
-{
-    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->highest_address;
-}
+    //compute the size of the new ephemeral heap segment.
+    compute_new_ephemeral_size();
 
-inline
-short*& card_table_brick_table (uint32_t* c_table)
-{
-    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->brick_table;
-}
+    if ((settings.pause_mode != pause_low_latency) &&
+        (settings.pause_mode != pause_no_gc)
+#ifdef BACKGROUND_GC
+        && (!gc_heap::background_running_p())
+#endif //BACKGROUND_GC
+        )
+    {
+        assert (settings.condemned_generation <= max_generation);
+        allocator*  gen_alloc = ((settings.condemned_generation == max_generation) ? nullptr :
+                              generation_allocator (generation_of (max_generation)));
+        dprintf (2, ("(gen%d)soh_get_segment_to_expand", settings.condemned_generation));
 
-#ifdef CARD_BUNDLE
-inline
-uint32_t*& card_table_card_bundle_table (uint32_t* c_table)
-{
-    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->card_bundle_table;
-}
-#endif //CARD_BUNDLE
+        // try to find one in the gen 2 segment list, search backwards because the first segments
+        // tend to be more compact than the later ones.
+        heap_segment* fseg = heap_segment_rw (generation_start_segment (generation_of (max_generation)));
 
-#ifdef BACKGROUND_GC
-inline
-uint32_t*& card_table_mark_array (uint32_t* c_table)
-{
-    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->mark_array;
-}
+        _ASSERTE(fseg != NULL);
 
-#ifdef HOST_64BIT
-#define mark_bit_pitch ((size_t)16)
-#else
-#define mark_bit_pitch ((size_t)8)
-#endif // HOST_64BIT
-#define mark_word_width ((size_t)32)
-#define mark_word_size (mark_word_width * mark_bit_pitch)
+#ifdef SEG_REUSE_STATS
+        int try_reuse = 0;
+#endif //SEG_REUSE_STATS
 
-inline
-uint8_t* align_on_mark_bit (uint8_t* add)
-{
-    return (uint8_t*)((size_t)(add + (mark_bit_pitch - 1)) & ~(mark_bit_pitch - 1));
-}
+        heap_segment* seg = ephemeral_heap_segment;
+        while ((seg = heap_segment_prev_rw (fseg, seg)) && (seg != fseg))
+        {
+#ifdef SEG_REUSE_STATS
+        try_reuse++;
+#endif //SEG_REUSE_STATS
 
-inline
-uint8_t* align_lower_mark_bit (uint8_t* add)
-{
-    return (uint8_t*)((size_t)(add) & ~(mark_bit_pitch - 1));
-}
+            if (can_expand_into_p (seg, size/3, total_ephemeral_size, gen_alloc))
+            {
+                get_gc_data_per_heap()->set_mechanism (gc_heap_expand,
+                    (use_bestfit ? expand_reuse_bestfit : expand_reuse_normal));
+                if (settings.condemned_generation == max_generation)
+                {
+                    if (use_bestfit)
+                    {
+                        build_ordered_free_spaces (seg);
+                        dprintf (GTC_LOG, ("can use best fit"));
+                    }
 
-inline
-BOOL is_aligned_on_mark_word (uint8_t* add)
-{
-    return ((size_t)add == ((size_t)(add) & ~(mark_word_size - 1)));
-}
+#ifdef SEG_REUSE_STATS
+                    dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse",
+                        settings.condemned_generation, try_reuse));
+#endif //SEG_REUSE_STATS
+                    dprintf (GTC_LOG, ("max_gen: Found existing segment to expand into %zx", (size_t)seg));
+                    return seg;
+                }
+                else
+                {
+#ifdef SEG_REUSE_STATS
+                    dprintf (SEG_REUSE_LOG_0, ("(gen%d)soh_get_segment_to_expand: found seg #%d to reuse - returning",
+                        settings.condemned_generation, try_reuse));
+#endif //SEG_REUSE_STATS
+                    dprintf (GTC_LOG, ("max_gen-1: Found existing segment to expand into %zx", (size_t)seg));
 
-inline
-uint8_t* align_on_mark_word (uint8_t* add)
-{
-    return (uint8_t*)((size_t)(add + mark_word_size - 1) & ~(mark_word_size - 1));
-}
+                    // If we return 0 here, the allocator will think since we are short on end
+                    // of seg we need to trigger a full compacting GC. So if sustained low latency
+                    // is set we should acquire a new seg instead, that way we wouldn't be short.
+                    // The real solution, of course, is to actually implement seg reuse in gen1.
+                    if (settings.pause_mode != pause_sustained_low_latency)
+                    {
+                        dprintf (GTC_LOG, ("max_gen-1: SustainedLowLatency is set, acquire a new seg"));
+                        get_gc_data_per_heap()->set_mechanism (gc_heap_expand, expand_next_full_gc);
+                        return 0;
+                    }
+                }
+            }
+        }
+    }
 
-inline
-uint8_t* align_lower_mark_word (uint8_t* add)
-{
-    return (uint8_t*)((size_t)(add) & ~(mark_word_size - 1));
-}
+    heap_segment* result = get_segment (size, gc_oh_num::soh);
 
-inline
-size_t mark_bit_of (uint8_t* add)
-{
-    return ((size_t)add / mark_bit_pitch);
-}
+    if(result)
+    {
+#ifdef BACKGROUND_GC
+        if (current_c_gc_state == c_gc_state_planning)
+        {
+            // When we expand heap during bgc sweep, we set the seg to be swept so
+            // we'll always look at cards for objects on the new segment.
+            result->flags |= heap_segment_flags_swept;
+        }
+#endif //BACKGROUND_GC
 
-inline
-unsigned int mark_bit_bit (size_t mark_bit)
-{
-    return (unsigned int)(mark_bit % mark_word_width);
-}
+        FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(result),
+                                  (size_t)(heap_segment_reserved (result) - heap_segment_mem(result)),
+                                  gc_etw_segment_small_object_heap);
+    }
 
-inline
-size_t mark_bit_word (size_t mark_bit)
-{
-    return (mark_bit / mark_word_width);
-}
+    get_gc_data_per_heap()->set_mechanism (gc_heap_expand, (result ? expand_new_seg : expand_no_memory));
 
-inline
-size_t mark_word_of (uint8_t* add)
-{
-    return ((size_t)add) / mark_word_size;
-}
+    if (result == 0)
+    {
+        dprintf (2, ("h%d: failed to allocate a new segment!", heap_number));
+    }
+    else
+    {
+#ifdef MULTIPLE_HEAPS
+        heap_segment_heap (result) = this;
+#endif //MULTIPLE_HEAPS
+    }
 
-uint8_t* mark_word_address (size_t wd)
-{
-    return (uint8_t*)(wd*mark_word_size);
+    dprintf (GTC_LOG, ("(gen%d)creating new segment %p", settings.condemned_generation, result));
+    return result;
 }
 
-uint8_t* mark_bit_address (size_t mark_bit)
+//returns 0 in case of allocation failure
+heap_segment*
+gc_heap::get_segment (size_t size, gc_oh_num oh)
 {
-    return (uint8_t*)(mark_bit*mark_bit_pitch);
-}
+    assert(oh != gc_oh_num::unknown);
+    BOOL uoh_p = (oh == gc_oh_num::loh) || (oh == gc_oh_num::poh);
+    if (heap_hard_limit)
+        return NULL;
 
-inline
-size_t mark_bit_bit_of (uint8_t* add)
-{
-    return  (((size_t)add / mark_bit_pitch) % mark_word_width);
-}
+    heap_segment* result = 0;
 
-size_t size_mark_array_of (uint8_t* from, uint8_t* end)
-{
-    assert (((size_t)from & ((mark_word_size)-1)) == 0);
-    assert (((size_t)end  & ((mark_word_size)-1)) == 0);
-    return sizeof (uint32_t)*(((end - from) / mark_word_size));
-}
+    if (segment_standby_list != 0)
+    {
+        result = segment_standby_list;
+        heap_segment* last = 0;
+        while (result)
+        {
+            size_t hs = (size_t)(heap_segment_reserved (result) - (uint8_t*)result);
+            if ((hs >= size) && ((hs / 2) < size))
+            {
+                dprintf (2, ("Hoarded segment %zx found", (size_t) result));
+                if (last)
+                {
+                    heap_segment_next (last) = heap_segment_next (result);
+                }
+                else
+                {
+                    segment_standby_list = heap_segment_next (result);
+                }
+                break;
+            }
+            else
+            {
+                last = result;
+                result = heap_segment_next (result);
+            }
+        }
+    }
 
-//In order to eliminate the lowest_address in the mark array
-//computations (mark_word_of, etc) mark_array is offset
-// according to the lowest_address.
-uint32_t* translate_mark_array (uint32_t* ma)
-{
-    return (uint32_t*)((uint8_t*)ma - size_mark_array_of (0, g_gc_lowest_address));
-}
+    if (result)
+    {
+        init_heap_segment (result, __this);
+#ifdef BACKGROUND_GC
+        if (is_bgc_in_progress())
+        {
+            dprintf (GC_TABLE_LOG, ("hoarded seg %p, mark_array is %p", result, mark_array));
+            if (!commit_mark_array_new_seg (__this, result))
+            {
+                dprintf (GC_TABLE_LOG, ("failed to commit mark array for hoarded seg"));
+                // If we can't use it we need to thread it back.
+                if (segment_standby_list != 0)
+                {
+                    heap_segment_next (result) = segment_standby_list;
+                    segment_standby_list = result;
+                }
+                else
+                {
+                    segment_standby_list = result;
+                }
 
+                result = 0;
+            }
+        }
 #endif //BACKGROUND_GC
 
-//These work on untranslated card tables
-inline
-uint32_t*& card_table_next (uint32_t* c_table)
-{
-    // NOTE:  The dac takes a dependency on card_table_info being right before c_table.
-    //        It's 100% ok to change this implementation detail as long as a matching change
-    //        is made to DacGCBookkeepingEnumerator::Init in daccess.cpp.
-    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->next_card_table;
-}
-
-inline
-size_t& card_table_size (uint32_t* c_table)
-{
-    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->size;
-}
-
-void own_card_table (uint32_t* c_table)
-{
-    card_table_refcount (c_table) += 1;
-}
-
-void destroy_card_table (uint32_t* c_table);
+        if (result)
+            seg_mapping_table_add_segment (result, __this);
+    }
 
-void delete_next_card_table (uint32_t* c_table)
-{
-    uint32_t* n_table = card_table_next (c_table);
-    if (n_table)
+    if (!result)
     {
-        if (card_table_next (n_table))
-        {
-            delete_next_card_table (n_table);
-        }
-        if (card_table_refcount (n_table) == 0)
+        void* mem = virtual_alloc (size);
+        if (!mem)
         {
-            destroy_card_table (n_table);
-            card_table_next (c_table) = 0;
+            fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p);
+            return 0;
         }
-    }
-}
 
-void release_card_table (uint32_t* c_table)
-{
-    assert (card_table_refcount (c_table) >0);
-    card_table_refcount (c_table) -= 1;
-    if (card_table_refcount (c_table) == 0)
-    {
-        delete_next_card_table (c_table);
-        if (card_table_next (c_table) == 0)
+        result = make_heap_segment ((uint8_t*)mem, size, __this, (oh + max_generation));
+
+        if (result)
         {
-            destroy_card_table (c_table);
-            // sever the link from the parent
-            if (&g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))] == c_table)
+            uint8_t* start;
+            uint8_t* end;
+            if (mem < g_gc_lowest_address)
             {
-                g_gc_card_table = 0;
+                start =  (uint8_t*)mem;
+            }
+            else
+            {
+                start = (uint8_t*)g_gc_lowest_address;
+            }
 
-#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
-                g_gc_card_bundle_table = 0;
-#endif
-#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
-                SoftwareWriteWatch::StaticClose();
-#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+            if (((uint8_t*)mem + size) > g_gc_highest_address)
+            {
+                end = (uint8_t*)mem + size;
             }
             else
             {
-                uint32_t* p_table = &g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))];
-                if (p_table)
+                end = (uint8_t*)g_gc_highest_address;
+            }
+
+            if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, uoh_p) != 0)
+            {
+                // release_segment needs the flags to decrement the proper bucket
+                size_t flags = 0;
+                if (oh == poh)
                 {
-                    while (p_table && (card_table_next (p_table) != c_table))
-                        p_table = card_table_next (p_table);
-                    card_table_next (p_table) = 0;
+                    flags = heap_segment_flags_poh;
+                }
+                else if (oh == loh)
+                {
+                    flags = heap_segment_flags_loh;
                 }
+                result->flags |= flags;
+                release_segment (result);
+                return 0;
             }
         }
-    }
-}
+        else
+        {
+            fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p);
+            virtual_free (mem, size);
+        }
 
-void destroy_card_table (uint32_t* c_table)
-{
-//  delete (uint32_t*)&card_table_refcount(c_table);
+        if (result)
+        {
+            seg_mapping_table_add_segment (result, __this);
+        }
+    }
 
-    size_t size = card_table_size(c_table);
-    gc_heap::destroy_card_table_helper (c_table);
-    GCToOSInterface::VirtualRelease (&card_table_refcount(c_table), size);
-    dprintf (2, ("Table Virtual Free : %zx", (size_t)&card_table_refcount(c_table)));
-}
+#ifdef BACKGROUND_GC
+    if (result)
+    {
+        ::record_changed_seg ((uint8_t*)result, heap_segment_reserved (result),
+                            settings.gc_index, current_bgc_state,
+                            seg_added);
+        bgc_verify_mark_array_cleared (result);
+    }
+#endif //BACKGROUND_GC
 
-uint8_t** make_mark_list (size_t size)
-{
-    uint8_t** mark_list = new (nothrow) uint8_t* [size];
-    return mark_list;
+    dprintf (GC_TABLE_LOG, ("h%d: new seg: %p-%p (%zd)", heap_number, result, ((uint8_t*)result + size), size));
+    return result;
 }
 
-#define swap(a,b){uint8_t* t; t = a; a = b; b = t;}
-
+#endif //!USE_REGIONS
 
-#ifndef USE_INTROSORT
-void qsort1( uint8_t* *low, uint8_t* *high, unsigned int depth)
-{
-    if (((low + 16) >= high) || (depth > 100))
+#ifdef MULTIPLE_HEAPS
+#ifdef HOST_X86
+#ifdef _MSC_VER
+#pragma warning(disable:4035)
+    ptrdiff_t  get_cycle_count()
     {
-        //insertion sort
-        uint8_t **i, **j;
-        for (i = low+1; i <= high; i++)
-        {
-            uint8_t* val = *i;
-            for (j=i;j >low && val<*(j-1);j--)
-            {
-                *j=*(j-1);
-            }
-            *j=val;
-        }
+        __asm   rdtsc
     }
-    else
+#pragma warning(default:4035)
+#elif defined(__GNUC__)
+    ptrdiff_t  get_cycle_count()
     {
-        uint8_t *pivot, **left, **right;
+        ptrdiff_t cycles;
+        ptrdiff_t cyclesHi;
+        __asm__ __volatile__
+        ("rdtsc":"=a" (cycles), "=d" (cyclesHi));
+        return cycles;
+    }
+#else //_MSC_VER
+#error Unknown compiler
+#endif //_MSC_VER
+#elif defined(TARGET_AMD64)
+#ifdef _MSC_VER
+extern "C" uint64_t __rdtsc();
+#pragma intrinsic(__rdtsc)
+    ptrdiff_t get_cycle_count()
+    {
+        return (ptrdiff_t)__rdtsc();
+    }
+#elif defined(__GNUC__)
+    ptrdiff_t get_cycle_count()
+    {
+        ptrdiff_t cycles;
+        ptrdiff_t cyclesHi;
+        __asm__ __volatile__
+        ("rdtsc":"=a" (cycles), "=d" (cyclesHi));
+        return (cyclesHi << 32) | cycles;
+    }
+#else // _MSC_VER
+    extern "C" ptrdiff_t get_cycle_count(void);
+#endif // _MSC_VER
+#elif defined(TARGET_LOONGARCH64)
+    ptrdiff_t get_cycle_count()
+    {
+        ////FIXME: TODO for LOONGARCH64:
+        //ptrdiff_t  cycle;
+        __asm__ volatile ("break 0 \n");
+        return 0;
+    }
+#else
+    ptrdiff_t get_cycle_count()
+    {
+        // @ARMTODO, @ARM64TODO, @WASMTODO: cycle counter is not exposed to user mode. For now (until we can show this
+        // makes a difference on the configurations on which we'll run) just return 0. This will result in
+        // all buffer access times being reported as equal in access_time().
+        return 0;
+    }
+#endif //TARGET_X86
 
-        //sort low middle and high
-        if (*(low+((high-low)/2)) < *low)
-            swap (*(low+((high-low)/2)), *low);
-        if (*high < *low)
-            swap (*low, *high);
-        if (*high < *(low+((high-low)/2)))
-            swap (*(low+((high-low)/2)), *high);
+// We may not be on contiguous numa nodes so need to store
+// the node index as well.
+uint8_t* heap_select::sniff_buffer;
+unsigned heap_select::n_sniff_buffers;
+unsigned heap_select::cur_sniff_index;
+uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
+uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
+uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
+uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+uint16_t  heap_select::total_numa_nodes;
+node_heap_count heap_select::heaps_on_node[MAX_SUPPORTED_NODES];
+#endif
 
-        swap (*(low+((high-low)/2)), *(high-1));
-        pivot =  *(high-1);
-        left = low; right = high-1;
-        while (1) {
-            while (*(--right) > pivot);
-            while (*(++left)  < pivot);
-            if (left < right)
-            {
-                swap(*left, *right);
-            }
-            else
-                break;
-        }
-        swap (*left, *(high-1));
-        qsort1(low, left-1, depth+1);
-        qsort1(left+1, high, depth+1);
-    }
-}
-#endif //USE_INTROSORT
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+// This records info we use to look at effect of different strategies
+// for heap balancing.
+struct heap_balance_info
+{
+    uint64_t timestamp;
+    // This also encodes when we detect the thread runs on
+    // different proc during a balance attempt. Sometimes
+    // I observe this happens multiple times during one attempt!
+    // If this happens, I just record the last proc we observe
+    // and set MSB.
+    int tid;
+    // This records the final alloc_heap for the thread.
+    //
+    // This also encodes the reason why we needed to set_home_heap
+    // in balance_heaps.
+    // If we set it because the home heap is not the same as the proc,
+    // we set MSB.
+    //
+    // If we set ideal proc, we set the 2nd MSB.
+    int alloc_heap;
+    int ideal_proc_no;
+};
 
-#ifdef USE_VXSORT
-static void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high)
-{
-    // above this threshold, using AVX2 for sorting will likely pay off
-    // despite possible downclocking on some devices
-    const ptrdiff_t AVX2_THRESHOLD_SIZE = 8 * 1024;
+// This means inbetween each GC we can log at most this many entries per proc.
+// This is usually enough. Most of the time we only need to log something every 128k
+// of allocations in balance_heaps and gen0 budget is <= 200mb.
+#define default_max_hb_heap_balance_info 4096
 
-    // above this threshold, using AVX512F for sorting will likely pay off
-    // despite possible downclocking on current devices
-    const ptrdiff_t AVX512F_THRESHOLD_SIZE = 128 * 1024;
+struct heap_balance_info_proc
+{
+    int count;
+    int index;
+    heap_balance_info hb_info[default_max_hb_heap_balance_info];
+};
 
-    // above this threshold, using NEON for sorting will likely pay off
-    const ptrdiff_t NEON_THRESHOLD_SIZE = 1024;
+struct heap_balance_info_numa
+{
+    heap_balance_info_proc* hb_info_procs;
+};
 
-    if (item_count <= 1)
-        return;
+uint64_t start_raw_ts = 0;
+bool cpu_group_enabled_p = false;
+uint32_t procs_per_numa_node = 0;
+uint16_t total_numa_nodes_on_machine = 0;
+uint32_t procs_per_cpu_group = 0;
+uint16_t total_cpu_groups_on_machine = 0;
+// Note this is still on one of the numa nodes, so we'll incur a remote access
+// no matter what.
+heap_balance_info_numa* hb_info_numa_nodes = NULL;
 
-#if defined(TARGET_AMD64)
-    if (IsSupportedInstructionSet (InstructionSet::AVX2) && (item_count > AVX2_THRESHOLD_SIZE))
+// TODO: This doesn't work for multiple nodes per CPU group yet.
+int get_proc_index_numa (int proc_no, int* numa_no)
+{
+    if (total_numa_nodes_on_machine == 1)
     {
-        dprintf(3, ("Sorting mark lists"));
-
-        // use AVX512F only if the list is large enough to pay for downclocking impact
-        if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE))
+        *numa_no = 0;
+        return proc_no;
+    }
+    else
+    {
+        if (cpu_group_enabled_p)
         {
-            do_vxsort_avx512 (item_array, &item_array[item_count - 1], range_low, range_high);
+            // see vm\gcenv.os.cpp GroupProcNo implementation.
+            *numa_no = proc_no >> 6;
+            return (proc_no % 64);
         }
         else
         {
-            do_vxsort_avx2 (item_array, &item_array[item_count - 1], range_low, range_high);
+            *numa_no = proc_no / procs_per_numa_node;
+            return (proc_no % procs_per_numa_node);
         }
     }
-#elif defined(TARGET_ARM64)
-    if (IsSupportedInstructionSet (InstructionSet::NEON) && (item_count > NEON_THRESHOLD_SIZE))
-    {
-        dprintf(3, ("Sorting mark lists"));
-        do_vxsort_neon (item_array, &item_array[item_count - 1], range_low, range_high);
-    }
-#endif
-    else
-    {
-        dprintf (3, ("Sorting mark lists"));
-        introsort::sort (item_array, &item_array[item_count - 1], 0);
-    }
-#ifdef _DEBUG
-    // check the array is sorted
-    for (ptrdiff_t i = 0; i < item_count - 1; i++)
+}
+
+
+
+const int hb_log_buffer_size = 4096;
+static char hb_log_buffer[hb_log_buffer_size];
+int last_hb_recorded_gc_index = -1;
+#endif //HEAP_BALANCE_INSTRUMENTATION
+
+void set_thread_affinity_for_heap (int heap_number, uint16_t proc_no)
+{
+    if (!GCToOSInterface::SetThreadAffinity (proc_no))
     {
-        assert (item_array[i] <= item_array[i + 1]);
+        dprintf (1, ("Failed to set thread affinity for GC thread %d on proc #%d", heap_number, proc_no));
     }
-    // check that the ends of the array are indeed in range
-    // together with the above this implies all elements are in range
-    assert ((range_low <= item_array[0]) && (item_array[item_count - 1] <= range_high));
-#endif
 }
-#endif //USE_VXSORT
 
-#ifdef MULTIPLE_HEAPS
-
-#ifdef _DEBUG
+#endif //MULTIPLE_HEAPS
 
-#if !defined(_MSC_VER)
-#if !defined(__cdecl)
-#if defined(__i386__)
-#define __cdecl __attribute__((cdecl))
+void gc_mechanisms::init_mechanisms()
+{
+    condemned_generation = 0;
+    promotion = FALSE;//TRUE;
+    compaction = TRUE;
+#ifdef FEATURE_LOH_COMPACTION
+    loh_compaction = gc_heap::loh_compaction_requested();
 #else
-#define __cdecl
-#endif
-#endif
-#endif
+    loh_compaction = FALSE;
+#endif //FEATURE_LOH_COMPACTION
+    heap_expansion = FALSE;
+    concurrent = FALSE;
+    demotion = FALSE;
+    elevation_reduced = FALSE;
+    found_finalizers = FALSE;
+#ifdef BACKGROUND_GC
+    background_p = gc_heap::background_running_p() != FALSE;
+#endif //BACKGROUND_GC
 
-#endif // _DEBUG
+    entry_memory_load = 0;
+    entry_available_physical_mem = 0;
+    exit_memory_load = 0;
 
-#else
+#ifdef STRESS_HEAP
+    stress_induced = FALSE;
+#endif // STRESS_HEAP
+}
 
-#ifdef USE_REGIONS
+void gc_mechanisms::first_init()
+{
+    gc_index = 0;
+    gen0_reduction_count = 0;
+    should_lock_elevation = FALSE;
+    elevation_locked_count = 0;
+    reason = reason_empty;
+#ifdef BACKGROUND_GC
+    pause_mode = gc_heap::gc_can_use_concurrent ? pause_interactive : pause_batch;
+#ifdef _DEBUG
+    int debug_pause_mode = static_cast<int>(GCConfig::GetLatencyMode());
+    if (debug_pause_mode >= 0)
+    {
+        assert (debug_pause_mode <= pause_sustained_low_latency);
+        pause_mode = (gc_pause_mode)debug_pause_mode;
+    }
+#endif //_DEBUG
+#else //BACKGROUND_GC
+    pause_mode = pause_batch;
+#endif //BACKGROUND_GC
 
+    init_mechanisms();
+}
 
-#endif //USE_REGIONS
+void gc_mechanisms::record (gc_history_global* history)
+{
+#ifdef MULTIPLE_HEAPS
+    history->num_heaps = gc_heap::n_heaps;
+#else
+    history->num_heaps = 1;
 #endif //MULTIPLE_HEAPS
 
-#ifndef USE_REGIONS
-class seg_free_spaces
-{
-    struct seg_free_space
-    {
-        BOOL is_plug;
-        void* start;
-    };
+    history->condemned_generation = condemned_generation;
+    history->gen0_reduction_count = gen0_reduction_count;
+    history->reason = reason;
+    history->pause_mode = (int)pause_mode;
+    history->mem_pressure = entry_memory_load;
+    history->global_mechanisms_p = 0;
 
-    struct free_space_bucket
-    {
-        seg_free_space* free_space;
-        ptrdiff_t count_add; // Assigned when we first construct the array.
-        ptrdiff_t count_fit; // How many items left when we are fitting plugs.
-    };
+    // start setting the boolean values.
+    if (concurrent)
+        history->set_mechanism_p (global_concurrent);
 
-    void move_bucket (int old_power2, int new_power2)
-    {
-        // PREFAST warning 22015: old_power2 could be negative
-        assert (old_power2 >= 0);
-        assert (old_power2 >= new_power2);
+    if (compaction)
+        history->set_mechanism_p (global_compaction);
 
-        if (old_power2 == new_power2)
-        {
-            return;
-        }
+    if (promotion)
+        history->set_mechanism_p (global_promotion);
 
-        seg_free_space* src_index = free_space_buckets[old_power2].free_space;
-        for (int i = old_power2; i > new_power2; i--)
-        {
-            seg_free_space** dest = &(free_space_buckets[i].free_space);
-            (*dest)++;
+    if (demotion)
+        history->set_mechanism_p (global_demotion);
 
-            seg_free_space* dest_index = free_space_buckets[i - 1].free_space;
-            if (i > (new_power2 + 1))
-            {
-                seg_free_space temp = *src_index;
-                *src_index = *dest_index;
-                *dest_index = temp;
-            }
-            src_index = dest_index;
-        }
+    if (card_bundles)
+        history->set_mechanism_p (global_card_bundles);
 
-        free_space_buckets[old_power2].count_fit--;
-        free_space_buckets[new_power2].count_fit++;
-    }
+    if (elevation_reduced)
+        history->set_mechanism_p (global_elevation);
+}
 
-#ifdef _DEBUG
+/**********************************
+   called at the beginning of GC to fix the allocated size to
+   what is really allocated, or to turn the free area into an unused object
+   It needs to be called after all of the other allocation contexts have been
+   fixed since it relies on alloc_allocated.
+ ********************************/
 
-    void dump_free_space (seg_free_space* item)
-    {
-        uint8_t* addr = 0;
-        size_t len = 0;
 
-        if (item->is_plug)
-        {
-            mark* m = (mark*)(item->start);
-            len = pinned_len (m);
-            addr = pinned_plug (m) - len;
-        }
-        else
-        {
-            heap_segment* seg = (heap_segment*)(item->start);
-            addr = heap_segment_plan_allocated (seg);
-            len = heap_segment_committed (seg) - addr;
-        }
 
-        dprintf (SEG_REUSE_LOG_1, ("[%d]0x%p %zd", heap_num, addr, len));
-    }
+#ifdef CARD_BUNDLE
+// The card bundle keeps track of groups of card words.
+size_t align_cardw_on_bundle (size_t cardw)
+{
+    return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 ));
+}
 
-    void dump()
-    {
-        seg_free_space* item = NULL;
-        int i = 0;
+// Get the card bundle representing a card word
+size_t cardw_card_bundle (size_t cardw)
+{
+    return cardw / card_bundle_size;
+}
 
-        dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------\nnow the free spaces look like:", heap_num));
-        for (i = 0; i < (free_space_bucket_count - 1); i++)
-        {
-            dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i)));
-            dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len"));
-            item = free_space_buckets[i].free_space;
-            while (item < free_space_buckets[i + 1].free_space)
-            {
-                dump_free_space (item);
-                item++;
-            }
-            dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num));
-        }
+// Get the first card word in a card bundle
+size_t card_bundle_cardw (size_t cardb)
+{
+    return cardb * card_bundle_size;
+}
 
-        dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i)));
-        dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len"));
-        item = free_space_buckets[i].free_space;
 
-        while (item <= &seg_free_space_array[free_space_item_count - 1])
-        {
-            dump_free_space (item);
-            item++;
-        }
-        dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num));
-    }
+// Takes a pointer to a card bundle table and an address, and returns a pointer that represents
+// where a theoretical card bundle table that represents every address (starting from 0) would
+// start if the bundle word representing the address were to be located at the pointer passed in.
+// The returned 'translated' pointer makes it convenient/fast to calculate where the card bundle
+// for a given address is using a simple shift operation on the address.
+uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address)
+{
+    // The number of bytes of heap memory represented by a card bundle word
+    const size_t heap_bytes_for_bundle_word = card_size * card_word_width * card_bundle_size * card_bundle_word_width;
 
-#endif //_DEBUG
+    // Each card bundle word is 32 bits
+    return (uint32_t*)((uint8_t*)cb - (((size_t)lowest_address / heap_bytes_for_bundle_word) * sizeof (uint32_t)));
+}
 
-    free_space_bucket* free_space_buckets;
-    seg_free_space* seg_free_space_array;
-    ptrdiff_t free_space_bucket_count;
-    ptrdiff_t free_space_item_count;
-    int base_power2;
-    int heap_num;
-#ifdef _DEBUG
-    BOOL has_end_of_seg;
-#endif //_DEBUG
+#endif // CARD_BUNDLE
+
+size_t size_brick_of (uint8_t* from, uint8_t* end)
+{
+    assert (((size_t)from & (brick_size-1)) == 0);
+    assert (((size_t)end  & (brick_size-1)) == 0);
 
-public:
+    return ((end - from) / brick_size) * sizeof (short);
+}
 
-    seg_free_spaces (int h_number)
-    {
-        heap_num = h_number;
-    }
+// Returns the number of DWORDs in the card table that cover the
+// range of addresses [from, end[.
+size_t count_card_of (uint8_t* from, uint8_t* end)
+{
+    return card_word (gcard_of (end - 1)) - card_word (gcard_of (from)) + 1;
+}
 
-    BOOL alloc ()
-    {
-        size_t total_prealloc_size =
-            MAX_NUM_BUCKETS * sizeof (free_space_bucket) +
-            MAX_NUM_FREE_SPACES * sizeof (seg_free_space);
+// Returns the number of bytes to allocate for a card table
+// that covers the range of addresses [from, end[.
+size_t size_card_of (uint8_t* from, uint8_t* end)
+{
+    return count_card_of (from, end) * sizeof(uint32_t);
+}
 
-        free_space_buckets = (free_space_bucket*) new (nothrow) uint8_t[total_prealloc_size];
+uint32_t* translate_card_table (uint32_t* ct)
+{
+    return (uint32_t*)((uint8_t*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(uint32_t));
+}
 
-        return (!!free_space_buckets);
-    }
+#ifdef BACKGROUND_GC
+inline
+uint8_t* align_on_mark_bit (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + (mark_bit_pitch - 1)) & ~(mark_bit_pitch - 1));
+}
 
-    // We take the ordered free space array we got from the 1st pass,
-    // and feed the portion that we decided to use to this method, ie,
-    // the largest item_count free spaces.
-    void add_buckets (int base, size_t* ordered_free_spaces, int bucket_count, size_t item_count)
-    {
-        assert (free_space_buckets);
-        assert (item_count <= (size_t)MAX_PTR);
+inline
+uint8_t* align_lower_mark_bit (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add) & ~(mark_bit_pitch - 1));
+}
 
-        free_space_bucket_count = bucket_count;
-        free_space_item_count = item_count;
-        base_power2 = base;
-#ifdef _DEBUG
-        has_end_of_seg = FALSE;
-#endif //_DEBUG
+inline
+BOOL is_aligned_on_mark_word (uint8_t* add)
+{
+    return ((size_t)add == ((size_t)(add) & ~(mark_word_size - 1)));
+}
 
-        ptrdiff_t total_item_count = 0;
-        ptrdiff_t i = 0;
+inline
+uint8_t* align_lower_mark_word (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add) & ~(mark_word_size - 1));
+}
 
-        seg_free_space_array = (seg_free_space*)(free_space_buckets + free_space_bucket_count);
+uint8_t* mark_bit_address (size_t mark_bit)
+{
+    return (uint8_t*)(mark_bit*mark_bit_pitch);
+}
 
-        for (i = 0; i < (ptrdiff_t)item_count; i++)
-        {
-            seg_free_space_array[i].start = 0;
-            seg_free_space_array[i].is_plug = FALSE;
-        }
+//In order to eliminate the lowest_address in the mark array
+//computations (mark_word_of, etc) mark_array is offset
+// according to the lowest_address.
+uint32_t* translate_mark_array (uint32_t* ma)
+{
+    return (uint32_t*)((uint8_t*)ma - size_mark_array_of (0, g_gc_lowest_address));
+}
 
-        for (i = 0; i < bucket_count; i++)
-        {
-            free_space_buckets[i].count_add = ordered_free_spaces[i];
-            free_space_buckets[i].count_fit = ordered_free_spaces[i];
-            free_space_buckets[i].free_space = &seg_free_space_array[total_item_count];
-            total_item_count += free_space_buckets[i].count_add;
-        }
+#endif //BACKGROUND_GC
 
-        assert (total_item_count == (ptrdiff_t)item_count);
-    }
+void own_card_table (uint32_t* c_table)
+{
+    card_table_refcount (c_table) += 1;
+}
 
-    // If we are adding a free space before a plug we pass the
-    // mark stack position so we can update the length; we could
-    // also be adding the free space after the last plug in which
-    // case start is the segment which we'll need to update the
-    // heap_segment_plan_allocated.
-    void add (void* start, BOOL plug_p, BOOL first_p)
+void delete_next_card_table (uint32_t* c_table)
+{
+    uint32_t* n_table = card_table_next (c_table);
+    if (n_table)
     {
-        size_t size = (plug_p ?
-                       pinned_len ((mark*)start) :
-                       (heap_segment_committed ((heap_segment*)start) -
-                           heap_segment_plan_allocated ((heap_segment*)start)));
-
-        if (plug_p)
+        if (card_table_next (n_table))
         {
-            dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space before plug: %zd", heap_num, size));
+            delete_next_card_table (n_table);
         }
-        else
+        if (card_table_refcount (n_table) == 0)
         {
-            dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space at end of seg: %zd", heap_num, size));
-#ifdef _DEBUG
-            has_end_of_seg = TRUE;
-#endif //_DEBUG
+            destroy_card_table (n_table);
+            card_table_next (c_table) = 0;
         }
+    }
+}
 
-        if (first_p)
+void release_card_table (uint32_t* c_table)
+{
+    assert (card_table_refcount (c_table) >0);
+    card_table_refcount (c_table) -= 1;
+    if (card_table_refcount (c_table) == 0)
+    {
+        delete_next_card_table (c_table);
+        if (card_table_next (c_table) == 0)
         {
-            size_t eph_gen_starts = gc_heap::eph_gen_starts_size;
-            size -= eph_gen_starts;
-            if (plug_p)
+            destroy_card_table (c_table);
+            // sever the link from the parent
+            if (&g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))] == c_table)
             {
-                mark* m = (mark*)(start);
-                pinned_len (m) -= eph_gen_starts;
+                g_gc_card_table = 0;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+                g_gc_card_bundle_table = 0;
+#endif
+#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+                SoftwareWriteWatch::StaticClose();
+#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
             }
             else
             {
-                heap_segment* seg = (heap_segment*)start;
-                heap_segment_plan_allocated (seg) += eph_gen_starts;
+                uint32_t* p_table = &g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))];
+                if (p_table)
+                {
+                    while (p_table && (card_table_next (p_table) != c_table))
+                        p_table = card_table_next (p_table);
+                    card_table_next (p_table) = 0;
+                }
             }
         }
+    }
+}
 
-        int bucket_power2 = index_of_highest_set_bit (size);
-        if (bucket_power2 < base_power2)
-        {
-            return;
-        }
-
-        free_space_bucket* bucket = &free_space_buckets[bucket_power2 - base_power2];
-
-        seg_free_space* bucket_free_space = bucket->free_space;
-        assert (plug_p || (!plug_p && bucket->count_add));
-
-        if (bucket->count_add == 0)
-        {
-            dprintf (SEG_REUSE_LOG_1, ("[%d]Already have enough of 2^%d", heap_num, bucket_power2));
-            return;
-        }
-
-        ptrdiff_t index = bucket->count_add - 1;
+void destroy_card_table (uint32_t* c_table)
+{
+//  delete (uint32_t*)&card_table_refcount(c_table);
 
-        dprintf (SEG_REUSE_LOG_1, ("[%d]Building free spaces: adding %p; len: %zd (2^%d)",
-                    heap_num,
-                    (plug_p ?
-                        (pinned_plug ((mark*)start) - pinned_len ((mark*)start)) :
-                        heap_segment_plan_allocated ((heap_segment*)start)),
-                    size,
-                    bucket_power2));
+    size_t size = card_table_size(c_table);
+    gc_heap::destroy_card_table_helper (c_table);
+    GCToOSInterface::VirtualRelease (&card_table_refcount(c_table), size);
+    dprintf (2, ("Table Virtual Free : %zx", (size_t)&card_table_refcount(c_table)));
+}
 
-        if (plug_p)
-        {
-            bucket_free_space[index].is_plug = TRUE;
-        }
+uint8_t** make_mark_list (size_t size)
+{
+    uint8_t** mark_list = new (nothrow) uint8_t* [size];
+    return mark_list;
+}
 
-        bucket_free_space[index].start = start;
-        bucket->count_add--;
-    }
+#define swap(a,b){uint8_t* t; t = a; a = b; b = t;}
 
-#ifdef _DEBUG
 
-    // Do a consistency check after all free spaces are added.
-    void check()
+#ifndef USE_INTROSORT
+void qsort1( uint8_t* *low, uint8_t* *high, unsigned int depth)
+{
+    if (((low + 16) >= high) || (depth > 100))
     {
-        ptrdiff_t i = 0;
-        int end_of_seg_count = 0;
-
-        for (i = 0; i < free_space_item_count; i++)
+        //insertion sort
+        uint8_t **i, **j;
+        for (i = low+1; i <= high; i++)
         {
-            assert (seg_free_space_array[i].start);
-            if (!(seg_free_space_array[i].is_plug))
+            uint8_t* val = *i;
+            for (j=i;j >low && val<*(j-1);j--)
             {
-                end_of_seg_count++;
+                *j=*(j-1);
             }
-        }
-
-        if (has_end_of_seg)
-        {
-            assert (end_of_seg_count == 1);
-        }
-        else
-        {
-            assert (end_of_seg_count == 0);
-        }
-
-        for (i = 0; i < free_space_bucket_count; i++)
-        {
-            assert (free_space_buckets[i].count_add == 0);
+            *j=val;
         }
     }
-
-#endif //_DEBUG
-
-    uint8_t* fit (uint8_t* old_loc,
-               size_t plug_size
-               REQD_ALIGN_AND_OFFSET_DCL)
+    else
     {
-        if (old_loc)
-        {
-#ifdef SHORT_PLUGS
-            assert (!is_plug_padded (old_loc));
-#endif //SHORT_PLUGS
-            assert (!node_realigned (old_loc));
-        }
-
-        size_t saved_plug_size = plug_size;
-
-#ifdef FEATURE_STRUCTALIGN
-        // BARTOKTODO (4841): this code path is disabled (see can_fit_all_blocks_p) until we take alignment requirements into account
-        _ASSERTE(requiredAlignment == DATA_ALIGNMENT && false);
-#endif // FEATURE_STRUCTALIGN
-
-        size_t plug_size_to_fit = plug_size;
-
-        // best fit is only done for gen1 to gen2 and we do not pad in gen2.
-        // however we must account for requirements of large alignment.
-        // which may result in realignment padding.
-#ifdef RESPECT_LARGE_ALIGNMENT
-        plug_size_to_fit += switch_alignment_size(FALSE);
-#endif //RESPECT_LARGE_ALIGNMENT
-
-        int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
-        ptrdiff_t i;
-        uint8_t* new_address = 0;
-
-        if (plug_power2 < base_power2)
-        {
-            plug_power2 = base_power2;
-        }
-
-        int chosen_power2 = plug_power2 - base_power2;
-retry:
-        for (i = chosen_power2; i < free_space_bucket_count; i++)
-        {
-            if (free_space_buckets[i].count_fit != 0)
-            {
-                break;
-            }
-            chosen_power2++;
-        }
-
-        dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting plug len %zd (2^%d) using 2^%d free space",
-            heap_num,
-            plug_size,
-            plug_power2,
-            (chosen_power2 + base_power2)));
-
-        assert (i < free_space_bucket_count);
-
-        seg_free_space* bucket_free_space = free_space_buckets[chosen_power2].free_space;
-        ptrdiff_t free_space_count = free_space_buckets[chosen_power2].count_fit;
-        size_t new_free_space_size = 0;
-        BOOL can_fit = FALSE;
-        size_t pad = 0;
+        uint8_t *pivot, **left, **right;
 
-        for (i = 0; i < free_space_count; i++)
-        {
-            size_t free_space_size = 0;
-            pad = 0;
+        //sort low middle and high
+        if (*(low+((high-low)/2)) < *low)
+            swap (*(low+((high-low)/2)), *low);
+        if (*high < *low)
+            swap (*low, *high);
+        if (*high < *(low+((high-low)/2)))
+            swap (*(low+((high-low)/2)), *high);
 
-            if (bucket_free_space[i].is_plug)
+        swap (*(low+((high-low)/2)), *(high-1));
+        pivot =  *(high-1);
+        left = low; right = high-1;
+        while (1) {
+            while (*(--right) > pivot);
+            while (*(++left)  < pivot);
+            if (left < right)
             {
-                mark* m = (mark*)(bucket_free_space[i].start);
-                uint8_t* plug_free_space_start = pinned_plug (m) - pinned_len (m);
-
-                if (!((old_loc == 0) || same_large_alignment_p (old_loc, plug_free_space_start)))
-                {
-                    pad = switch_alignment_size (FALSE);
-                }
-
-                plug_size = saved_plug_size + pad;
-
-                free_space_size = pinned_len (m);
-                new_address = pinned_plug (m) - pinned_len (m);
-
-                if (free_space_size >= (plug_size + Align (min_obj_size)) ||
-                    free_space_size == plug_size)
-                {
-                    new_free_space_size = free_space_size - plug_size;
-                    pinned_len (m) = new_free_space_size;
-#ifdef SIMPLE_DPRINTF
-                    dprintf (SEG_REUSE_LOG_0, ("[%d]FP: 0x%p->0x%p(%zx)(%zx), [0x%p (2^%d) -> [0x%p (2^%d)",
-                                heap_num,
-                                old_loc,
-                                new_address,
-                                (plug_size - pad),
-                                pad,
-                                pinned_plug (m),
-                                index_of_highest_set_bit (free_space_size),
-                                (pinned_plug (m) - pinned_len (m)),
-                                index_of_highest_set_bit (new_free_space_size)));
-#endif //SIMPLE_DPRINTF
-
-                    if (pad != 0)
-                    {
-                        set_node_realigned (old_loc);
-                    }
-
-                    can_fit = TRUE;
-                }
+                swap(*left, *right);
             }
             else
-            {
-                heap_segment* seg = (heap_segment*)(bucket_free_space[i].start);
-                free_space_size = heap_segment_committed (seg) - heap_segment_plan_allocated (seg);
-
-                if (!((old_loc == 0) || same_large_alignment_p (old_loc, heap_segment_plan_allocated (seg))))
-                {
-                    pad = switch_alignment_size (FALSE);
-                }
-
-                plug_size = saved_plug_size + pad;
-
-                if (free_space_size >= (plug_size + Align (min_obj_size)) ||
-                    free_space_size == plug_size)
-                {
-                    new_address = heap_segment_plan_allocated (seg);
-                    new_free_space_size = free_space_size - plug_size;
-                    heap_segment_plan_allocated (seg) = new_address + plug_size;
-#ifdef SIMPLE_DPRINTF
-                    dprintf (SEG_REUSE_LOG_0, ("[%d]FS: 0x%p-> 0x%p(%zd) (2^%d) -> 0x%p (2^%d)",
-                                heap_num,
-                                old_loc,
-                                new_address,
-                                (plug_size - pad),
-                                index_of_highest_set_bit (free_space_size),
-                                heap_segment_plan_allocated (seg),
-                                index_of_highest_set_bit (new_free_space_size)));
-#endif //SIMPLE_DPRINTF
-
-                    if (pad != 0)
-                        set_node_realigned (old_loc);
-
-                    can_fit = TRUE;
-                }
-            }
-
-            if (can_fit)
-            {
                 break;
-            }
-        }
-
-        if (!can_fit)
-        {
-            assert (chosen_power2 == 0);
-            chosen_power2 = 1;
-            goto retry;
         }
+        swap (*left, *(high-1));
+        qsort1(low, left-1, depth+1);
+        qsort1(left+1, high, depth+1);
+    }
+}
+#endif //USE_INTROSORT
 
-        new_address += pad;
-        assert ((chosen_power2 && (i == 0)) ||
-                ((!chosen_power2) && (i < free_space_count)));
-
-        int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size);
-
-        if (new_bucket_power2 < base_power2)
-        {
-            new_bucket_power2 = base_power2;
-        }
+#ifdef USE_VXSORT
+void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high)
+{
+    // above this threshold, using AVX2 for sorting will likely pay off
+    // despite possible downclocking on some devices
+    const ptrdiff_t AVX2_THRESHOLD_SIZE = 8 * 1024;
 
-        move_bucket (chosen_power2, new_bucket_power2 - base_power2);
+    // above this threshold, using AVX512F for sorting will likely pay off
+    // despite possible downclocking on current devices
+    const ptrdiff_t AVX512F_THRESHOLD_SIZE = 128 * 1024;
 
-        //dump();
+    // above this threshold, using NEON for sorting will likely pay off
+    const ptrdiff_t NEON_THRESHOLD_SIZE = 1024;
 
-        return new_address;
-    }
+    if (item_count <= 1)
+        return;
 
-    void cleanup ()
+#if defined(TARGET_AMD64)
+    if (IsSupportedInstructionSet (InstructionSet::AVX2) && (item_count > AVX2_THRESHOLD_SIZE))
     {
-        if (free_space_buckets)
+        dprintf(3, ("Sorting mark lists"));
+
+        // use AVX512F only if the list is large enough to pay for downclocking impact
+        if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE))
         {
-            delete [] free_space_buckets;
+            do_vxsort_avx512 (item_array, &item_array[item_count - 1], range_low, range_high);
         }
-        if (seg_free_space_array)
+        else
         {
-            delete [] seg_free_space_array;
+            do_vxsort_avx2 (item_array, &item_array[item_count - 1], range_low, range_high);
         }
     }
-};
-#endif //!USE_REGIONS
-
-#define marked(i) header(i)->IsMarked()
-#define set_marked(i) header(i)->SetMarked()
-#define clear_marked(i) header(i)->ClearMarked()
-#define pinned(i) header(i)->IsPinned()
-#define set_pinned(i) header(i)->SetPinned()
-#define clear_pinned(i) header(i)->GetHeader()->ClrGCBit();
-
-inline size_t my_get_size (Object* ob)
-{
-    MethodTable* mT = header(ob)->GetMethodTable();
-
-    return (mT->GetBaseSize() +
-            (mT->HasComponentSize() ?
-             ((size_t)((CObjectHeader*)ob)->GetNumComponents() * mT->RawGetComponentSize()) : 0));
+#elif defined(TARGET_ARM64)
+    if (IsSupportedInstructionSet (InstructionSet::NEON) && (item_count > NEON_THRESHOLD_SIZE))
+    {
+        dprintf(3, ("Sorting mark lists"));
+        do_vxsort_neon (item_array, &item_array[item_count - 1], range_low, range_high);
+    }
+#endif
+    else
+    {
+        dprintf (3, ("Sorting mark lists"));
+        introsort::sort (item_array, &item_array[item_count - 1], 0);
+    }
+#ifdef _DEBUG
+    // check the array is sorted
+    for (ptrdiff_t i = 0; i < item_count - 1; i++)
+    {
+        assert (item_array[i] <= item_array[i + 1]);
+    }
+    // check that the ends of the array are indeed in range
+    // together with the above this implies all elements are in range
+    assert ((range_low <= item_array[0]) && (item_array[item_count - 1] <= range_high));
+#endif
 }
-
-#define size(i) my_get_size (header(i))
-
-#define contain_pointers(i) header(i)->ContainsGCPointers()
-#ifdef COLLECTIBLE_CLASS
-#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointersOrCollectible()
-
-#define get_class_object(i) GCToEEInterface::GetLoaderAllocatorObjectForGC((Object *)i)
-#define is_collectible(i) method_table(i)->Collectible()
-#else //COLLECTIBLE_CLASS
-#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointers()
-#endif //COLLECTIBLE_CLASS
-
-#ifdef USE_REGIONS
-
-
-static GCSpinLock write_barrier_spin_lock;
-
-#endif //USE_REGIONS
+#endif //USE_VXSORT
 
 #ifdef WRITE_WATCH
 uint8_t* g_addresses [array_size+2]; // to get around the bug in GetWriteWatch
 
 #ifdef BACKGROUND_GC
-const size_t ww_reset_quantum = 128*1024*1024;
+extern const size_t ww_reset_quantum = 128*1024*1024;
 
 #endif //BACKGROUND_GC
 #endif //WRITE_WATCH
@@ -7235,32 +3821,11 @@ gc_heap::destroy_gc_heap(gc_heap* heap)
     delete heap;
 }
 
-enum {
-CORINFO_EXCEPTION_GC = 0xE0004743 // 'GC'
-};
-
-
-#define mark_stack_empty_p() (mark_stack_base == mark_stack_tos)
-
 #ifdef USE_REGIONS
 #ifdef DYNAMIC_HEAP_COUNT
 
 // check that the fields of a decommissioned heap have their expected values,
 // i.e. were not inadvertently modified
-#define DECOMMISSIONED_VALUE 0xdec0dec0dec0dec0
-static const size_t DECOMMISSIONED_SIZE_T = DECOMMISSIONED_VALUE;
-static const ptrdiff_t DECOMMISSIONED_PTRDIFF_T = (ptrdiff_t)DECOMMISSIONED_VALUE;
-static const ptrdiff_t DECOMMISSIONED_UINT64_T = (uint64_t)DECOMMISSIONED_VALUE;
-static uint8_t* const DECOMMISSIONED_UINT8_T_P = (uint8_t*)DECOMMISSIONED_VALUE;
-static uint8_t** const DECOMMISSIONED_UINT8_T_PP = (uint8_t**)DECOMMISSIONED_VALUE;
-static PTR_heap_segment const DECOMMISSIONED_REGION_P = (PTR_heap_segment)DECOMMISSIONED_VALUE;
-static mark* const DECOMMISSIONED_MARK_P = (mark*)DECOMMISSIONED_VALUE;
-static const BOOL DECOMMISSIONED_BOOL = 0xdec0dec0;
-static const BOOL DECOMMISSIONED_INT = (int)0xdec0dec0;
-static const float DECOMMISSIONED_FLOAT = (float)DECOMMISSIONED_VALUE;
-
-static const ptrdiff_t UNINITIALIZED_VALUE  = 0xbaadbaadbaadbaad;
-
 
 float log_with_base (float x, float base)
 {
@@ -7331,212 +3896,12 @@ float gc_heap::dynamic_heap_count_data_t::slope (float* y, int n, float* avg)
 
 #ifdef MULTIPLE_HEAPS
 
-#ifdef GC_CONFIG_DRIVEN
-#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}}
-#else //GC_CONFIG_DRIVEN
-#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}}
-#endif //GC_CONFIG_DRIVEN
-
-#define m_boundary_fullgc(o) {}
-
-#else //MULTIPLE_HEAPS
-
-#ifdef GC_CONFIG_DRIVEN
-#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;}
-#else
-#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}if (slow > o) slow = o; if (shigh < o) shigh = o;}
-#endif //GC_CONFIG_DRIVEN
-
-#define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;}
-
-#endif //MULTIPLE_HEAPS
-
-#ifdef USE_REGIONS
-inline bool is_in_heap_range (uint8_t* o)
-{
-#ifdef FEATURE_BASICFREEZE
-    // we may have frozen objects in read only segments
-    // outside of the reserved address range of the gc heap
-    assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) ||
-        (o == nullptr) || (ro_segment_lookup (o) != nullptr));
-    return ((g_gc_lowest_address <= o) && (o < g_gc_highest_address));
-#else //FEATURE_BASICFREEZE
-    // without frozen objects, every non-null pointer must be
-    // within the heap
-    assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address));
-    return (o != nullptr);
-#endif //FEATURE_BASICFREEZE
-}
-
-#endif //USE_REGIONS
-
-#define new_start() {if (ppstop <= start) {break;} else {parm = start}}
-#define ignore_start 0
-#define use_start 1
-
-#define go_through_object(mt,o,size,parm,start,start_useful,limit,exp)      \
-{                                                                           \
-    CGCDesc* map = CGCDesc::GetCGCDescFromMT((MethodTable*)(mt));           \
-    CGCDescSeries* cur = map->GetHighestSeries();                           \
-    ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries();                        \
-                                                                            \
-    if (cnt >= 0)                                                           \
-    {                                                                       \
-        CGCDescSeries* last = map->GetLowestSeries();                       \
-        uint8_t** parm = 0;                                                 \
-        do                                                                  \
-        {                                                                   \
-            assert (parm <= (uint8_t**)((o) + cur->GetSeriesOffset()));     \
-            parm = (uint8_t**)((o) + cur->GetSeriesOffset());               \
-            uint8_t** ppstop =                                              \
-                (uint8_t**)((uint8_t*)parm + cur->GetSeriesSize() + (size));\
-            if (!start_useful || (uint8_t*)ppstop > (start))                \
-            {                                                               \
-                if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);\
-                while (parm < ppstop)                                       \
-                {                                                           \
-                   {exp}                                                    \
-                   parm++;                                                  \
-                }                                                           \
-            }                                                               \
-            cur--;                                                          \
-                                                                            \
-        } while (cur >= last);                                              \
-    }                                                                       \
-    else                                                                    \
-    {                                                                       \
-        /* Handle the repeating case - array of valuetypes */               \
-        uint8_t** parm = (uint8_t**)((o) + cur->startoffset);               \
-        if (start_useful && start > (uint8_t*)parm)                         \
-        {                                                                   \
-            ptrdiff_t cs = mt->RawGetComponentSize();                         \
-            parm = (uint8_t**)((uint8_t*)parm + (((start) - (uint8_t*)parm)/cs)*cs); \
-        }                                                                   \
-        while ((uint8_t*)parm < ((o)+(size)-plug_skew))                     \
-        {                                                                   \
-            for (ptrdiff_t __i = 0; __i > cnt; __i--)                         \
-            {                                                               \
-                HALF_SIZE_T skip =  (cur->val_serie + __i)->skip;           \
-                HALF_SIZE_T nptrs = (cur->val_serie + __i)->nptrs;          \
-                uint8_t** ppstop = parm + nptrs;                            \
-                if (!start_useful || (uint8_t*)ppstop > (start))            \
-                {                                                           \
-                    if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);      \
-                    do                                                      \
-                    {                                                       \
-                       {exp}                                                \
-                       parm++;                                              \
-                    } while (parm < ppstop);                                \
-                }                                                           \
-                parm = (uint8_t**)((uint8_t*)ppstop + skip);                \
-            }                                                               \
-        }                                                                   \
-    }                                                                       \
-}
-
-#define go_through_object_nostart(mt,o,size,parm,exp) {go_through_object(mt,o,size,parm,o,ignore_start,(o + size),exp); }
-
-// 1 thing to note about this macro:
-// 1) you can use *parm safely but in general you don't want to use parm
-// because for the collectible types it's not an address on the managed heap.
-#ifndef COLLECTIBLE_CLASS
-#define go_through_object_cl(mt,o,size,parm,exp)                            \
-{                                                                           \
-    if (header(o)->ContainsGCPointers())                                      \
-    {                                                                       \
-        go_through_object_nostart(mt,o,size,parm,exp);                      \
-    }                                                                       \
-}
-#else //COLLECTIBLE_CLASS
-#define go_through_object_cl(mt,o,size,parm,exp)                            \
-{                                                                           \
-    if (header(o)->Collectible())                                           \
-    {                                                                       \
-        uint8_t* class_obj = get_class_object (o);                             \
-        uint8_t** parm = &class_obj;                                           \
-        do {exp} while (false);                                             \
-    }                                                                       \
-    if (header(o)->ContainsGCPointers())                                      \
-    {                                                                       \
-        go_through_object_nostart(mt,o,size,parm,exp);                      \
-    }                                                                       \
-}
-#endif //COLLECTIBLE_CLASS
-
-// enable on processors known to have a useful prefetch instruction
-#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64) || defined(TARGET_RISCV64)
-#define PREFETCH
-#endif
-
-#ifdef PREFETCH
-inline void Prefetch(void* addr)
-{
-#ifdef TARGET_WINDOWS
-
-#if defined(TARGET_AMD64) || defined(TARGET_X86)
-
-#ifndef _MM_HINT_T0
-#define _MM_HINT_T0 1
-#endif
-    _mm_prefetch((const char*)addr, _MM_HINT_T0);
-#elif defined(TARGET_ARM64)
-    __prefetch((const char*)addr);
-#endif //defined(TARGET_AMD64) || defined(TARGET_X86)
-
-#elif defined(TARGET_UNIX)
-    __builtin_prefetch(addr);
-#else //!(TARGET_WINDOWS || TARGET_UNIX)
-    UNREFERENCED_PARAMETER(addr);
-#endif //TARGET_WINDOWS
-}
-#else //PREFETCH
-inline void Prefetch (void* addr)
-{
-    UNREFERENCED_PARAMETER(addr);
-}
-#endif //PREFETCH
-
-#define stolen 2
-#define partial 1
-#define partial_object 3
-
-inline
-BOOL stolen_p (uint8_t* r)
-{
-    return (((size_t)r&2) && !((size_t)r&1));
-}
-inline
-BOOL ready_p (uint8_t* r)
-{
-    return ((size_t)r != 1);
-}
-inline
-BOOL partial_p (uint8_t* r)
-{
-    return (((size_t)r&1) && !((size_t)r&2));
-}
-inline
-BOOL straight_ref_p (uint8_t* r)
-{
-    return (!stolen_p (r) && !partial_p (r));
-}
-inline
-BOOL partial_object_p (uint8_t* r)
-{
-    return (((size_t)r & partial_object) == partial_object);
-}
-
-
-
-#ifdef MULTIPLE_HEAPS
-
-static VOLATILE(BOOL) s_fUnpromotedHandles = FALSE;
-static VOLATILE(BOOL) s_fUnscannedPromotions = FALSE;
-static VOLATILE(BOOL) s_fScanRequired;
+VOLATILE(BOOL) s_fUnpromotedHandles = FALSE;
+VOLATILE(BOOL) s_fUnscannedPromotions = FALSE;
+VOLATILE(BOOL) s_fScanRequired;
 #else //MULTIPLE_HEAPS
 #endif //MULTIPLE_HEAPS
 
-#ifdef FEATURE_STRUCTALIGN
 //
 // The word with left child, right child, and align info is laid out as follows:
 //
@@ -7555,70 +3920,6 @@ static VOLATILE(BOOL) s_fScanRequired;
 // by adding the alignment iff the misalignment is non-zero and less than min_obj_size.
 //
 
-// The number of bits in a brick.
-#if defined (TARGET_AMD64)
-#define brick_bits (12)
-#else
-#define brick_bits (11)
-#endif //TARGET_AMD64
-static_assert(brick_size == (1 << brick_bits));
-
-// The number of bits needed to represent the offset to a child node.
-// "brick_bits + 1" allows us to represent a signed offset within a brick.
-#define child_bits (brick_bits + 1 - LOG2_PTRSIZE)
-
-// The number of bits in each of the pad hi, pad lo fields.
-#define pad_bits (sizeof(short) * 8 - child_bits)
-
-#define child_from_short(w) (((signed short)(w) / (1 << (pad_bits - LOG2_PTRSIZE))) & ~((1 << LOG2_PTRSIZE) - 1))
-#define pad_mask ((1 << pad_bits) - 1)
-#define pad_from_short(w) ((size_t)(w) & pad_mask)
-#else // FEATURE_STRUCTALIGN
-#define child_from_short(w) (w)
-#endif // FEATURE_STRUCTALIGN
-
-inline
-short node_left_child(uint8_t* node)
-{
-    return child_from_short(((plug_and_pair*)node)[-1].m_pair.left);
-}
-
-inline
-void set_node_left_child(uint8_t* node, ptrdiff_t val)
-{
-    assert (val > -(ptrdiff_t)brick_size);
-    assert (val < (ptrdiff_t)brick_size);
-    assert (Aligned (val));
-#ifdef FEATURE_STRUCTALIGN
-    size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.left);
-    ((plug_and_pair*)node)[-1].m_pair.left = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad;
-#else // FEATURE_STRUCTALIGN
-    ((plug_and_pair*)node)[-1].m_pair.left = (short)val;
-#endif // FEATURE_STRUCTALIGN
-    assert (node_left_child (node) == val);
-}
-
-inline
-short node_right_child(uint8_t* node)
-{
-    return child_from_short(((plug_and_pair*)node)[-1].m_pair.right);
-}
-
-inline
-void set_node_right_child(uint8_t* node, ptrdiff_t val)
-{
-    assert (val > -(ptrdiff_t)brick_size);
-    assert (val < (ptrdiff_t)brick_size);
-    assert (Aligned (val));
-#ifdef FEATURE_STRUCTALIGN
-    size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.right);
-    ((plug_and_pair*)node)[-1].m_pair.right = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad;
-#else // FEATURE_STRUCTALIGN
-    ((plug_and_pair*)node)[-1].m_pair.right = (short)val;
-#endif // FEATURE_STRUCTALIGN
-    assert (node_right_child (node) == val);
-}
-
 #ifdef FEATURE_STRUCTALIGN
 void node_aligninfo (uint8_t* node, int& requiredAlignment, ptrdiff_t& pad)
 {
@@ -7683,39 +3984,6 @@ void set_node_aligninfo (uint8_t* node, int requiredAlignment, ptrdiff_t pad)
 }
 #endif // FEATURE_STRUCTALIGN
 
-inline
-void loh_set_node_relocation_distance(uint8_t* node, ptrdiff_t val)
-{
-    ptrdiff_t* place = &(((loh_obj_and_pad*)node)[-1].reloc);
-    *place = val;
-}
-
-inline
-ptrdiff_t loh_node_relocation_distance(uint8_t* node)
-{
-    return (((loh_obj_and_pad*)node)[-1].reloc);
-}
-
-inline
-ptrdiff_t node_relocation_distance (uint8_t* node)
-{
-    return (((plug_and_reloc*)(node))[-1].reloc & ~3);
-}
-
-inline
-void set_node_relocation_distance(uint8_t* node, ptrdiff_t val)
-{
-    assert (val == (val & ~3));
-    ptrdiff_t* place = &(((plug_and_reloc*)node)[-1].reloc);
-    //clear the left bit and the relocation field
-    *place &= 1;
-    *place |= val;
-}
-
-#define node_left_p(node) (((plug_and_reloc*)(node))[-1].reloc & 2)
-
-#define set_node_left(node) ((plug_and_reloc*)(node))[-1].reloc |= 2;
-
 #ifndef FEATURE_STRUCTALIGN
 void set_node_realigned(uint8_t* node)
 {
@@ -7732,25 +4000,6 @@ void clear_node_realigned(uint8_t* node)
 }
 #endif // FEATURE_STRUCTALIGN
 
-inline
-size_t  node_gap_size (uint8_t* node)
-{
-    return ((plug_and_gap *)node)[-1].gap;
-}
-
-void set_gap_size (uint8_t* node, size_t size)
-{
-    assert (Aligned (size));
-
-    // clear the 2 uint32_t used by the node.
-    ((plug_and_gap *)node)[-1].reloc = 0;
-    ((plug_and_gap *)node)[-1].lr =0;
-    ((plug_and_gap *)node)[-1].gap = size;
-
-    assert ((size == 0 )||(size >= sizeof(plug_and_reloc)));
-
-}
-
 /*****************************
 Called after compact phase to fix all generation gaps
 ********************************/
@@ -7784,47 +4033,6 @@ void heap_segment::thread_free_obj (uint8_t* obj, size_t s)
 
 #endif //USE_REGIONS
 
-inline
-uint8_t* tree_search (uint8_t* tree, uint8_t* old_address)
-{
-    uint8_t* candidate = 0;
-    int cn;
-    while (1)
-    {
-        if (tree < old_address)
-        {
-            if ((cn = node_right_child (tree)) != 0)
-            {
-                assert (candidate < tree);
-                candidate = tree;
-                tree = tree + cn;
-                Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left);
-                continue;
-            }
-            else
-                break;
-        }
-        else if (tree > old_address)
-        {
-            if ((cn = node_left_child (tree)) != 0)
-            {
-                tree = tree + cn;
-                Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left);
-                continue;
-            }
-            else
-                break;
-        } else
-            break;
-    }
-    if (tree <= old_address)
-        return tree;
-    else if (candidate)
-        return candidate;
-    else
-        return tree;
-}
-
 
 #ifdef MULTIPLE_HEAPS
 
@@ -7886,9 +4094,6 @@ gc_heap::bgc_suspend_EE ()
 }
 #endif //MULTIPLE_HEAPS
 
-#ifdef BGC_SERVO_TUNING
-
-#endif //BGC_SERVO_TUNING
 #endif //BACKGROUND_GC
 
 //because of heap expansion, computing end is complicated.
@@ -8120,21 +4325,6 @@ void StressHeapDummy ();
 #endif // STRESS_HEAP
 #endif // !FEATURE_NATIVEAOT
 
-#ifdef FEATURE_PREMORTEM_FINALIZATION
-#define REGISTER_FOR_FINALIZATION(_object, _size) \
-    hp->finalize_queue->RegisterForFinalization (0, (_object), (_size))
-#else // FEATURE_PREMORTEM_FINALIZATION
-#define REGISTER_FOR_FINALIZATION(_object, _size) true
-#endif // FEATURE_PREMORTEM_FINALIZATION
-
-#define CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(_object, _size, _register) do {  \
-    if ((_object) == NULL || ((_register) && !REGISTER_FOR_FINALIZATION(_object, _size)))   \
-    {                                                                                       \
-        STRESS_LOG_OOM_STACK(_size);                                                        \
-        return NULL;                                                                        \
-    }                                                                                       \
-} while (false)
-
 #if defined(WRITE_BARRIER_CHECK) && !defined (SERVER_GC)
 // This code is designed to catch the failure to update the write barrier
 // The way it works is to copy the whole heap right after every GC.  The write
@@ -8482,68 +4672,6 @@ void PopulateDacVars(GcDacVars *gcDacVars)
     }
 }
 
-inline
-BOOL gc_heap::ephemeral_pointer_p (uint8_t* o)
-{
-#ifdef USE_REGIONS
-    int gen_num = object_gennum ((uint8_t*)o);
-    assert (gen_num >= 0);
-    return (gen_num < max_generation);
-#else
-    return ((o >= ephemeral_low) && (o < ephemeral_high));
-#endif //USE_REGIONS
-}
-
-// This needs to check the range that's covered by bookkeeping because find_object will
-// need to look at the brick table.
-inline
-bool gc_heap::is_in_find_object_range (uint8_t* o)
-{
-    if (o == nullptr)
-    {
-        return false;
-    }
-#if defined(USE_REGIONS) && defined(FEATURE_CONSERVATIVE_GC)
-    return ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed));
-#else //USE_REGIONS && FEATURE_CONSERVATIVE_GC
-    if ((o >= g_gc_lowest_address) && (o < g_gc_highest_address))
-    {
-#ifdef USE_REGIONS
-        assert ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed));
-#endif //USE_REGIONS
-        return true;
-    }
-    else
-    {
-        return false;
-    }
-#endif //USE_REGIONS && FEATURE_CONSERVATIVE_GC
-}
-
-#ifdef USE_REGIONS
-
-// This assumes o is guaranteed to be in a region.
-inline
-bool gc_heap::is_in_condemned_gc (uint8_t* o)
-{
-    assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address));
-
-    int condemned_gen = settings.condemned_generation;
-    if (condemned_gen < max_generation)
-    {
-        int gen = get_region_gen_num (o);
-        if (gen > condemned_gen)
-        {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-#endif //USE_REGIONS
-
-
 #if defined (_MSC_VER) && defined (TARGET_X86)
 #pragma optimize("y", on)        // Small critical routines, don't put in EBP frame
 #endif //_MSC_VER && TARGET_X86
@@ -8631,36 +4759,6 @@ gc_heap* seg_mapping_table_heap_of_gc (uint8_t* o)
 }
 #endif //MULTIPLE_HEAPS
 
-#if !defined(_DEBUG) && !defined(__GNUC__)
-inline // This causes link errors if global optimization is off
-#endif //!_DEBUG && !__GNUC__
-gc_heap* gc_heap::heap_of (uint8_t* o)
-{
-#ifdef MULTIPLE_HEAPS
-    if (o == 0)
-        return g_heaps [0];
-    gc_heap* hp = seg_mapping_table_heap_of (o);
-    return (hp ? hp : g_heaps[0]);
-#else //MULTIPLE_HEAPS
-    UNREFERENCED_PARAMETER(o);
-    return __this;
-#endif //MULTIPLE_HEAPS
-}
-
-inline
-gc_heap* gc_heap::heap_of_gc (uint8_t* o)
-{
-#ifdef MULTIPLE_HEAPS
-    if (o == 0)
-        return g_heaps [0];
-    gc_heap* hp = seg_mapping_table_heap_of_gc (o);
-    return (hp ? hp : g_heaps[0]);
-#else //MULTIPLE_HEAPS
-    UNREFERENCED_PARAMETER(o);
-    return __this;
-#endif //MULTIPLE_HEAPS
-}
-
 // will find all heap objects (large and small)
 //
 // Callers of this method need to guarantee the interior pointer is within the heap range.
@@ -8746,17 +4844,6 @@ size_t gc_heap::get_generation_start_size (int gen_number)
 #endif //!USE_REGIONS
 }
 
-inline
-int gc_heap::get_num_heaps()
-{
-#ifdef MULTIPLE_HEAPS
-    return n_heaps;
-#else
-    return 1;
-#endif //MULTIPLE_HEAPS
-}
-
-
 void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_bounds_check)
 {
     WriteBarrierParameters args = {};
@@ -8782,25 +4869,4 @@ void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_b
     GCToEEInterface::StompWriteBarrier(&args);
 }
 
-// Category-specific gc_heap method files
-#include "region_allocator.cpp"
-#include "region_free_list.cpp"
-#include "finalization.cpp"
-#include "interface.cpp"
-#include "allocation.cpp"
-#include "mark_phase.cpp"
-#include "plan_phase.cpp"
-#include "relocate_compact.cpp"
-#include "sweep.cpp"
-#include "background.cpp"
-#include "regions_segments.cpp"
-#include "card_table.cpp"
-#include "memory.cpp"
-#include "diagnostics.cpp"
-#include "dynamic_tuning.cpp"
-#include "no_gc.cpp"
-#include "dynamic_heap_count.cpp"
-#include "init.cpp"
-#include "collect.cpp"
-
 }
diff --git a/src/coreclr/gc/gceesvr.cpp b/src/coreclr/gc/gceesvr.cpp
deleted file mode 100644
index 9b37a77b0ae697..00000000000000
--- a/src/coreclr/gc/gceesvr.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#if defined(FEATURE_SVR_GC)
-#define SERVER_GC 1
-#include "gcee.cpp"
-#endif // FEATURE_SVR_GC
diff --git a/src/coreclr/gc/gceewks.cpp b/src/coreclr/gc/gceewks.cpp
deleted file mode 100644
index d0e275be2bb7ba..00000000000000
--- a/src/coreclr/gc/gceewks.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#ifdef SERVER_GC
-#undef SERVER_GC
-#endif
-
-#include "gcee.cpp"
diff --git a/src/coreclr/gc/gcinternal.h b/src/coreclr/gc/gcinternal.h
new file mode 100644
index 00000000000000..8e33a84168dcb1
--- /dev/null
+++ b/src/coreclr/gc/gcinternal.h
@@ -0,0 +1,4448 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef GC_INTERNAL_H
+#define GC_INTERNAL_H
+
+#include "common.h"
+#include "gcenv.h"
+
+#include "gc.h"
+#include "gcscan.h"
+#include "gcdesc.h"
+#include "softwarewritewatch.h"
+#include "handletable.h"
+#include "handletable.inl"
+#include "gcenv.inl"
+#include "gceventstatus.h"
+#include <minipal/memorybarrierprocesswide.h>
+
+// If FEATURE_INTERPRETER is set, always enable the GC side of FEATURE_CONSERVATIVE_GC
+#ifdef FEATURE_INTERPRETER
+#ifndef FEATURE_CONSERVATIVE_GC
+#define FEATURE_CONSERVATIVE_GC
+#endif
+#endif // FEATURE_INTERPRETER
+
+#ifdef __INTELLISENSE__
+#if defined(FEATURE_SVR_GC)
+
+#define SERVER_GC 1
+
+#else // defined(FEATURE_SVR_GC)
+
+#ifdef SERVER_GC
+#undef SERVER_GC
+#endif
+
+#endif // defined(FEATURE_SVR_GC)
+#endif // __INTELLISENSE__
+
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#include "vxsort/do_vxsort.h"
+#define USE_VXSORT
+#else
+#define USE_INTROSORT
+#endif // TARGET_AMD64 || TARGET_ARM64
+#include "introsort.h"
+
+#ifdef SERVER_GC
+namespace SVR {
+#else // SERVER_GC
+namespace WKS {
+#endif // SERVER_GC
+
+#include "gcimpl.h"
+#include "gcpriv.h"
+
+#ifdef DACCESS_COMPILE
+#error this source file should not be compiled with DACCESS_COMPILE!
+#endif //DACCESS_COMPILE
+
+// We just needed a simple random number generator for testing.
+class gc_rand
+{
+public:
+    static uint64_t x;
+
+    static uint64_t get_rand()
+    {
+        x = (314159269*x+278281) & 0x7FFFFFFF;
+        return x;
+    }
+
+    // obtain random number in the range 0 .. r-1
+    static uint64_t get_rand(uint64_t r)
+    {
+        // require r >= 0
+        uint64_t x = (uint64_t)((get_rand() * r) >> 31);
+        return x;
+    }
+};
+
+#define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0))
+#define MAX_YP_SPIN_COUNT_UNIT 32768
+#define MIN_SOH_CROSS_GEN_REFS (400)
+#define MIN_LOH_CROSS_GEN_REFS (800)
+
+#ifdef SERVER_GC
+#define partial_size_th 100
+#define num_partial_refs 64
+#else //SERVER_GC
+#define partial_size_th 100
+#define num_partial_refs 32
+#endif //SERVER_GC
+
+#ifdef HOST_64BIT
+#define MARK_STACK_INITIAL_LENGTH 1024
+#else
+#define MARK_STACK_INITIAL_LENGTH 128
+#endif // HOST_64BIT
+
+extern uint32_t yp_spin_count_unit;
+extern uint32_t original_spin_count_unit;
+
+#ifdef GC_CONFIG_DRIVEN
+extern int compact_ratio;
+#define gc_config_log_buffer_size (1*1024) // TEMP
+extern FILE* gc_config_log;
+extern uint8_t* gc_config_log_buffer;
+#endif //GC_CONFIG_DRIVEN
+
+#ifdef WRITE_WATCH
+extern uint8_t* g_addresses [array_size+2];
+#endif //WRITE_WATCH
+
+#define lowbits(wrd, bits) ((wrd) & ((1 << (bits))-1))
+#define highbits(wrd, bits) ((wrd) & ~((1 << (bits))-1))
+
+#if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
+extern BOOL bgc_heap_walk_for_etw_p;
+#endif //BACKGROUND_GC && FEATURE_EVENT_TRACE
+
+extern const char* const str_root_kinds[];
+
+#ifdef MH_SC_MARK
+extern const int max_snoop_level;
+#endif //MH_SC_MARK
+
+// This needs to check the range that's covered by bookkeeping because find_object will
+// need to look at the brick table.
+inline
+bool gc_heap::is_in_find_object_range (uint8_t* o)
+{
+    if (o == nullptr)
+    {
+        return false;
+    }
+#if defined(USE_REGIONS) && defined(FEATURE_CONSERVATIVE_GC)
+    return ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed));
+#else //USE_REGIONS && FEATURE_CONSERVATIVE_GC
+    if ((o >= g_gc_lowest_address) && (o < g_gc_highest_address))
+    {
+#ifdef USE_REGIONS
+        assert ((o >= g_gc_lowest_address) && (o < bookkeeping_covered_committed));
+#endif //USE_REGIONS
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+#endif //USE_REGIONS && FEATURE_CONSERVATIVE_GC
+}
+
+#ifdef USE_REGIONS
+
+// This assumes o is guaranteed to be in a region.
+inline
+bool gc_heap::is_in_condemned_gc (uint8_t* o)
+{
+    assert ((o >= g_gc_lowest_address) && (o < g_gc_highest_address));
+
+    int condemned_gen = settings.condemned_generation;
+    if (condemned_gen < max_generation)
+    {
+        int gen = get_region_gen_num (o);
+        if (gen > condemned_gen)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+#endif //USE_REGIONS
+
+#ifdef MULTIPLE_HEAPS
+extern uint32_t g_num_active_processors;
+
+// Note that when a join is no longer used we still keep the values here because
+// tooling already recognized them as having the meaning they were assigned originally.
+// It doesn't break tooling if we stop using them but does if we assign a new meaning
+// to them.
+enum gc_join_stage
+{
+    gc_join_init_cpu_mapping = 0,
+    gc_join_done = 1,
+    gc_join_generation_determined = 2,
+    gc_join_begin_mark_phase = 3,
+    gc_join_scan_dependent_handles = 4,
+    gc_join_rescan_dependent_handles = 5,
+    gc_join_scan_sizedref_done = 6,
+    gc_join_null_dead_short_weak = 7,
+    gc_join_scan_finalization = 8,
+    gc_join_null_dead_long_weak = 9,
+    gc_join_null_dead_syncblk = 10,
+    gc_join_decide_on_compaction = 11,
+    gc_join_rearrange_segs_compaction = 12,
+    gc_join_adjust_handle_age_compact = 13,
+    gc_join_adjust_handle_age_sweep = 14,
+    gc_join_begin_relocate_phase = 15,
+    gc_join_relocate_phase_done = 16,
+    gc_join_verify_objects_done = 17,
+    gc_join_start_bgc = 18,
+    gc_join_restart_ee = 19,
+    gc_join_concurrent_overflow = 20,
+    gc_join_suspend_ee = 21,
+    gc_join_bgc_after_ephemeral = 22,
+    gc_join_allow_fgc = 23,
+    gc_join_bgc_sweep = 24,
+    gc_join_suspend_ee_verify = 25,
+    gc_join_restart_ee_verify = 26,
+    gc_join_set_state_free = 27,
+    gc_r_join_update_card_bundle = 28,
+    gc_join_after_absorb = 29,
+    gc_join_verify_copy_table = 30,
+    gc_join_after_reset = 31,
+    gc_join_after_ephemeral_sweep = 32,
+    gc_join_after_profiler_heap_walk = 33,
+    gc_join_minimal_gc = 34,
+    gc_join_after_commit_soh_no_gc = 35,
+    gc_join_expand_loh_no_gc = 36,
+    gc_join_final_no_gc = 37,
+    // No longer in use but do not remove, see comments for this enum.
+    gc_join_disable_software_write_watch = 38,
+    gc_join_merge_temp_fl = 39,
+    gc_join_bridge_processing = 40,
+    gc_join_max = 41
+};
+
+enum gc_join_flavor
+{
+    join_flavor_server_gc = 0,
+    join_flavor_bgc = 1
+};
+
+#define first_thread_arrived 2
+#pragma warning(push)
+#pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads
+struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure
+{
+    // Shared non volatile keep on separate line to prevent eviction
+    int n_threads;
+
+    // Keep polling/wait structures on separate line write once per join
+    DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE)
+    GCEvent joined_event[3]; // the last event in the array is only used for first_thread_arrived.
+    Volatile<int> lock_color;
+    VOLATILE(BOOL) wait_done;
+    VOLATILE(BOOL) joined_p;
+
+    // Keep volatile counted locks on separate cache line write many per join
+    DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE)
+    VOLATILE(int) join_lock;
+    VOLATILE(int) r_join_lock;
+
+};
+#pragma warning(pop)
+
+enum join_type
+{
+    type_last_join = 0,
+    type_join = 1,
+    type_restart = 2,
+    type_first_r_join = 3,
+    type_r_join = 4
+};
+
+enum join_time
+{
+    time_start = 0,
+    time_end = 1
+};
+
+enum join_heap_index
+{
+    join_heap_restart = 100,
+    join_heap_r_restart = 200
+};
+
+class t_join
+{
+    join_structure join_struct;
+
+    int id;
+    gc_join_flavor flavor;
+
+#ifdef JOIN_STATS
+    uint64_t start[MAX_SUPPORTED_CPUS], end[MAX_SUPPORTED_CPUS], start_seq;
+    // remember join id and last thread to arrive so restart can use these
+    int thd;
+    // we want to print statistics every 10 seconds - this is to remember the start of the 10 sec interval
+    uint64_t start_tick;
+    // counters for joins, in 1000's of clock cycles
+    uint64_t elapsed_total[gc_join_max], wake_total[gc_join_max], seq_loss_total[gc_join_max], par_loss_total[gc_join_max], in_join_total[gc_join_max];
+#endif //JOIN_STATS
+
+public:
+    BOOL init (int n_th, gc_join_flavor f)
+    {
+        dprintf (JOIN_LOG, ("Initializing join structure"));
+        join_struct.n_threads = n_th;
+        join_struct.lock_color = 0;
+        for (int i = 0; i < 3; i++)
+        {
+            if (!join_struct.joined_event[i].IsValid())
+            {
+                join_struct.joined_p = FALSE;
+                dprintf (JOIN_LOG, ("Creating join event %d", i));
+                // TODO - changing this to a non OS event
+                // because this is also used by BGC threads which are
+                // managed threads and WaitEx does not allow you to wait
+                // for an OS event on a managed thread.
+                // But we are not sure if this plays well in the hosting
+                // environment.
+                //join_struct.joined_event[i].CreateOSManualEventNoThrow(FALSE);
+                if (!join_struct.joined_event[i].CreateManualEventNoThrow(FALSE))
+                    return FALSE;
+            }
+        }
+        join_struct.join_lock = join_struct.n_threads;
+        join_struct.r_join_lock = join_struct.n_threads;
+        join_struct.wait_done = FALSE;
+        flavor = f;
+
+#ifdef JOIN_STATS
+        start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
+#endif //JOIN_STATS
+
+        return TRUE;
+    }
+
+    void update_n_threads(int n_th)
+    {
+        join_struct.n_threads = n_th;
+        join_struct.join_lock = n_th;
+        join_struct.r_join_lock = n_th;
+    }
+
+    int get_num_threads()
+    {
+        return join_struct.n_threads;
+    }
+
+    // This is for instrumentation only.
+    int get_join_lock()
+    {
+        return VolatileLoadWithoutBarrier (&join_struct.join_lock);
+    }
+
+    void destroy ()
+    {
+        dprintf (JOIN_LOG, ("Destroying join structure"));
+        for (int i = 0; i < 3; i++)
+        {
+            if (join_struct.joined_event[i].IsValid())
+                join_struct.joined_event[i].CloseEvent();
+        }
+    }
+
+    inline void fire_event (int heap, join_time time, join_type type, int join_id)
+    {
+        FIRE_EVENT(GCJoin_V2, heap, time, type, join_id);
+    }
+
+    void join (gc_heap* gch, int join_id)
+    {
+#ifdef JOIN_STATS
+        // parallel execution ends here
+        end[gch->heap_number] = get_ts();
+#endif //JOIN_STATS
+
+        assert (!join_struct.joined_p);
+        int color = join_struct.lock_color.LoadWithoutBarrier();
+
+        if (Interlocked::Decrement(&join_struct.join_lock) != 0)
+        {
+            dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d",
+                flavor, join_id, (int32_t)(join_struct.join_lock)));
+
+            fire_event (gch->heap_number, time_start, type_join, join_id);
+
+            //busy wait around the color
+            if (color == join_struct.lock_color.LoadWithoutBarrier())
+            {
+respin:
+                int spin_count = 128 * yp_spin_count_unit;
+                for (int j = 0; j < spin_count; j++)
+                {
+                    if (color != join_struct.lock_color.LoadWithoutBarrier())
+                    {
+                        break;
+                    }
+                    YieldProcessor();           // indicate to the processor that we are spinning
+                }
+
+                // we've spun, and if color still hasn't changed, fall into hard wait
+                if (color == join_struct.lock_color.LoadWithoutBarrier())
+                {
+                    dprintf (JOIN_LOG, ("join%d(%d): Join() hard wait on reset event %d, join_lock is now %d",
+                        flavor, join_id, color, (int32_t)(join_struct.join_lock)));
+
+                    uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE);
+
+                    if (dwJoinWait != WAIT_OBJECT_0)
+                    {
+                        STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait);
+                        FATAL_GC_ERROR ();
+                    }
+                }
+
+                // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
+                if (color == join_struct.lock_color.LoadWithoutBarrier())
+                {
+                    dprintf (9999, ("---h%d %d j%d %d - respin!!! (c:%d-%d)",
+                        gch->heap_number, join_id, join_struct.n_threads, color, join_struct.lock_color.LoadWithoutBarrier()));
+                    goto respin;
+                }
+
+                dprintf (JOIN_LOG, ("join%d(%d): Join() done, join_lock is %d",
+                    flavor, join_id, (int32_t)(join_struct.join_lock)));
+            }
+
+            fire_event (gch->heap_number, time_end, type_join, join_id);
+
+#ifdef JOIN_STATS
+            // parallel execution starts here
+            start[gch->heap_number] = get_ts();
+            Interlocked::ExchangeAdd(&in_join_total[join_id], (start[gch->heap_number] - end[gch->heap_number]));
+#endif //JOIN_STATS
+        }
+        else
+        {
+            fire_event (gch->heap_number, time_start, type_last_join, join_id);
+
+            join_struct.joined_p = TRUE;
+            dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id));
+            join_struct.joined_event[!color].Reset();
+            id = join_id;
+#ifdef JOIN_STATS
+            // remember the join id, the last thread arriving, the start of the sequential phase,
+            // and keep track of the cycles spent waiting in the join
+            thd = gch->heap_number;
+            start_seq = get_ts();
+            Interlocked::ExchangeAdd(&in_join_total[join_id], (start_seq - end[gch->heap_number]));
+#endif //JOIN_STATS
+        }
+    }
+
+    // Reverse join - first thread gets here does the work; other threads will only proceed
+    // after the work is done.
+    // Note that you cannot call this twice in a row on the same thread. Plus there's no
+    // need to call it twice in row - you should just merge the work.
+    BOOL r_join (gc_heap* gch, int join_id)
+    {
+
+        if (join_struct.n_threads == 1)
+        {
+            return TRUE;
+        }
+
+        if (Interlocked::CompareExchange(&join_struct.r_join_lock, 0, join_struct.n_threads) == 0)
+        {
+            fire_event (gch->heap_number, time_start, type_join, join_id);
+
+            dprintf (JOIN_LOG, ("r_join() Waiting..."));
+
+            //busy wait around the color
+respin:
+            int spin_count = 256 * yp_spin_count_unit;
+            for (int j = 0; j < spin_count; j++)
+            {
+                if (join_struct.wait_done)
+                {
+                    break;
+                }
+                YieldProcessor();           // indicate to the processor that we are spinning
+            }
+
+            // we've spun, and if color still hasn't changed, fall into hard wait
+            if (!join_struct.wait_done)
+            {
+                dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived));
+                uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE);
+                if (dwJoinWait != WAIT_OBJECT_0)
+                {
+                    STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %zx", dwJoinWait);
+                    FATAL_GC_ERROR ();
+                }
+            }
+
+            // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
+            if (!join_struct.wait_done)
+            {
+                goto respin;
+            }
+
+            dprintf (JOIN_LOG, ("r_join() done"));
+
+            fire_event (gch->heap_number, time_end, type_join, join_id);
+
+            return FALSE;
+        }
+        else
+        {
+            fire_event (gch->heap_number, time_start, type_first_r_join, join_id);
+            return TRUE;
+        }
+    }
+
+#ifdef JOIN_STATS
+    uint64_t get_ts()
+    {
+        return GCToOSInterface::QueryPerformanceCounter();
+    }
+
+    void start_ts (gc_heap* gch)
+    {
+        // parallel execution ends here
+        start[gch->heap_number] = get_ts();
+    }
+#endif //JOIN_STATS
+
+    void restart()
+    {
+#ifdef JOIN_STATS
+        uint64_t elapsed_seq = get_ts() - start_seq;
+        uint64_t max = 0, sum = 0, wake = 0;
+        uint64_t min_ts = start[0];
+        for (int i = 1; i < join_struct.n_threads; i++)
+        {
+            if(min_ts > start[i]) min_ts = start[i];
+        }
+
+        for (int i = 0; i < join_struct.n_threads; i++)
+        {
+            uint64_t wake_delay = start[i] - min_ts;
+            uint64_t elapsed = end[i] - start[i];
+            if (max < elapsed)
+                max = elapsed;
+            sum += elapsed;
+            wake += wake_delay;
+        }
+        uint64_t seq_loss = (join_struct.n_threads - 1)*elapsed_seq;
+        uint64_t par_loss = join_struct.n_threads*max - sum;
+        double efficiency = 0.0;
+        if (max > 0)
+            efficiency = sum*100.0/(join_struct.n_threads*max);
+
+        const double ts_scale = 1e-6;
+
+        // enable this printf to get statistics on each individual join as it occurs
+        //printf("join #%3d  seq_loss = %5g   par_loss = %5g  efficiency = %3.0f%%\n", join_id, ts_scale*seq_loss, ts_scale*par_loss, efficiency);
+
+        elapsed_total[id] += sum;
+        wake_total[id] += wake;
+        seq_loss_total[id] += seq_loss;
+        par_loss_total[id] += par_loss;
+
+        // every 10 seconds, print a summary of the time spent in each type of join
+        if (GCToOSInterface::GetLowPrecisionTimeStamp() - start_tick > 10*1000)
+        {
+            printf("**** summary *****\n");
+            for (int i = 0; i < 16; i++)
+            {
+                printf("join #%3d  elapsed_total = %8g wake_loss = %8g seq_loss = %8g  par_loss = %8g  in_join_total = %8g\n",
+                   i,
+                   ts_scale*elapsed_total[i],
+                   ts_scale*wake_total[i],
+                   ts_scale*seq_loss_total[i],
+                   ts_scale*par_loss_total[i],
+                   ts_scale*in_join_total[i]);
+                elapsed_total[i] = wake_total[i] = seq_loss_total[i] = par_loss_total[i] = in_join_total[i] = 0;
+            }
+            start_tick = GCToOSInterface::GetLowPrecisionTimeStamp();
+        }
+#endif //JOIN_STATS
+
+        fire_event (join_heap_restart, time_start, type_restart, -1);
+        assert (join_struct.joined_p);
+        join_struct.joined_p = FALSE;
+        join_struct.join_lock = join_struct.n_threads;
+        dprintf (JOIN_LOG, ("join%d(%d): Restarting from join: join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock)));
+        int color = join_struct.lock_color.LoadWithoutBarrier();
+        join_struct.lock_color = !color;
+        join_struct.joined_event[color].Set();
+
+        fire_event (join_heap_restart, time_end, type_restart, -1);
+
+#ifdef JOIN_STATS
+        start[thd] = get_ts();
+#endif //JOIN_STATS
+    }
+
+    BOOL joined()
+    {
+        dprintf (JOIN_LOG, ("join%d(%d): joined, join_lock is %d", flavor, id, (int32_t)(join_struct.join_lock)));
+        return join_struct.joined_p;
+    }
+
+    void r_restart()
+    {
+        if (join_struct.n_threads != 1)
+        {
+            fire_event (join_heap_r_restart, time_start, type_restart, -1);
+            join_struct.wait_done = TRUE;
+            join_struct.joined_event[first_thread_arrived].Set();
+            fire_event (join_heap_r_restart, time_end, type_restart, -1);
+        }
+    }
+
+    void r_init()
+    {
+        if (join_struct.n_threads != 1)
+        {
+            join_struct.r_join_lock = join_struct.n_threads;
+            join_struct.wait_done = FALSE;
+            join_struct.joined_event[first_thread_arrived].Reset();
+        }
+    }
+};
+
+extern t_join gc_t_join;
+#ifdef BACKGROUND_GC
+extern t_join bgc_t_join;
+#endif //BACKGROUND_GC
+#endif //MULTIPLE_HEAPS
+
+inline
+void c_write (uint32_t& place, uint32_t value)
+{
+    Interlocked::Exchange (&place, value);
+}
+
+#define spin_and_switch(count_to_spin, expr) \
+{ \
+    for (int j = 0; j < count_to_spin; j++) \
+    { \
+        if (expr) \
+        { \
+            break;\
+        } \
+        YieldProcessor(); \
+    } \
+    if (!(expr)) \
+    { \
+        GCToOSInterface::YieldThread(0); \
+    } \
+}
+
+#define spin_and_wait(count_to_spin, expr) \
+{ \
+    while (!expr) \
+    { \
+        for (int j = 0; j < count_to_spin; j++) \
+        { \
+            if (expr) \
+            { \
+                break; \
+            } \
+                YieldProcessor (); \
+        } \
+        if (!(expr)) \
+        { \
+            GCToOSInterface::YieldThread (0); \
+        } \
+    } \
+}
+
+#ifdef BACKGROUND_GC
+#define max_pending_allocs 64
+
+extern float bgc_uoh_inc_ratio_alloc_normal;
+extern float bgc_uoh_inc_ratio_alloc_wait;
+
+class exclusive_sync
+{
+    VOLATILE(uint8_t*) rwp_object;
+    VOLATILE(int32_t) needs_checking;
+
+    int spin_count;
+
+    uint8_t cache_separator[HS_CACHE_LINE_SIZE - (sizeof (spin_count) + sizeof (needs_checking) + sizeof (rwp_object))];
+
+    // TODO - perhaps each object should be on its own cache line...
+    VOLATILE(uint8_t*) alloc_objects[max_pending_allocs];
+
+    int find_free_index ()
+    {
+        for (int i = 0; i < max_pending_allocs; i++)
+        {
+            if (alloc_objects [i] == (uint8_t*)0)
+            {
+                return i;
+            }
+        }
+
+        return -1;
+    }
+
+public:
+    void init()
+    {
+        spin_count = 32 * (g_num_processors - 1);
+        rwp_object = 0;
+        needs_checking = 0;
+        for (int i = 0; i < max_pending_allocs; i++)
+        {
+            alloc_objects [i] = (uint8_t*)0;
+        }
+    }
+
+    void check()
+    {
+        for (int i = 0; i < max_pending_allocs; i++)
+        {
+            if (alloc_objects [i] != (uint8_t*)0)
+            {
+                FATAL_GC_ERROR();
+            }
+        }
+    }
+
+    void bgc_mark_set (uint8_t* obj)
+    {
+        dprintf (3, ("cm: probing %p", obj));
+retry:
+        if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0)
+        {
+            // If we spend too much time spending all the allocs,
+            // consider adding a high water mark and scan up
+            // to that; we'll need to interlock in done when
+            // we update the high watermark.
+            for (int i = 0; i < max_pending_allocs; i++)
+            {
+                if (obj == alloc_objects[i])
+                {
+                    needs_checking = 0;
+                    dprintf (3, ("cm: will spin"));
+                    spin_and_switch (spin_count, (obj != alloc_objects[i]));
+                    goto retry;
+                }
+            }
+
+            rwp_object = obj;
+            needs_checking = 0;
+            dprintf (3, ("cm: set %p", obj));
+            return;
+        }
+        else
+        {
+            spin_and_switch (spin_count, (needs_checking == 0));
+            goto retry;
+        }
+    }
+
+    int uoh_alloc_set (uint8_t* obj)
+    {
+        if (!gc_heap::cm_in_progress)
+        {
+            return -1;
+        }
+
+retry:
+        dprintf (3, ("uoh alloc: probing %p", obj));
+
+        if (Interlocked::CompareExchange(&needs_checking, 1, 0) == 0)
+        {
+            if (obj == rwp_object)
+            {
+                needs_checking = 0;
+                spin_and_switch (spin_count, (obj != rwp_object));
+                goto retry;
+            }
+            else
+            {
+                int cookie = find_free_index();
+
+                if (cookie != -1)
+                {
+                    alloc_objects[cookie] = obj;
+                    needs_checking = 0;
+
+                    dprintf (3, ("uoh alloc: set %p at %d", obj, cookie));
+                    return cookie;
+                }
+                else
+                {
+                    needs_checking = 0;
+                    dprintf (3, ("uoh alloc: setting %p will spin to acquire a free index", obj));
+                    spin_and_switch (spin_count, (find_free_index () != -1));
+                    goto retry;
+                }
+            }
+        }
+        else
+        {
+            dprintf (3, ("uoh alloc: will spin on checking %p", obj));
+            spin_and_switch (spin_count, (needs_checking == 0));
+            goto retry;
+        }
+    }
+
+    void bgc_mark_done ()
+    {
+        dprintf (3, ("cm: release lock on %p", (uint8_t *)rwp_object));
+        rwp_object = 0;
+    }
+
+    void uoh_alloc_done_with_index (int index)
+    {
+        dprintf (3, ("uoh alloc: release lock on %p based on %d", (uint8_t *)alloc_objects[index], index));
+        assert ((index >= 0) && (index < max_pending_allocs));
+        alloc_objects[index] = (uint8_t*)0;
+    }
+
+    void uoh_alloc_done (uint8_t* obj)
+    {
+        if (!gc_heap::cm_in_progress)
+        {
+            return;
+        }
+
+        for (int i = 0; i < max_pending_allocs; i++)
+        {
+            if (alloc_objects [i] == obj)
+            {
+                uoh_alloc_done_with_index(i);
+                return;
+            }
+        }
+        dprintf (3, ("uoh alloc: could not release lock on %p", obj));
+    }
+};
+#endif //BACKGROUND_GC
+
+#ifdef FEATURE_BASICFREEZE
+// The array we allocate is organized as follows:
+// 0th element is the address of the last array we allocated.
+// starting from the 1st element are the segment addresses, that's
+// what buckets() returns.
+struct bk
+{
+    uint8_t* add;
+    size_t val;
+};
+
+class sorted_table
+{
+private:
+    ptrdiff_t size;
+    ptrdiff_t count;
+    bk* slots;
+    bk* buckets() { return (slots + 1); }
+    uint8_t*& last_slot (bk* arr) { return arr[0].add; }
+    bk* old_slots;
+public:
+    static  sorted_table* make_sorted_table ();
+    BOOL    insert (uint8_t* add, size_t val);;
+    size_t  lookup (uint8_t*& add);
+    void    remove (uint8_t* add);
+    void    clear ();
+    void    delete_sorted_table();
+    void    delete_old_slots();
+    void    enqueue_old_slot(bk* sl);
+    BOOL    ensure_space_for_insert();
+};
+#endif //FEATURE_BASICFREEZE
+
+#ifdef FEATURE_STRUCTALIGN
+BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment);
+#endif // FEATURE_STRUCTALIGN
+
+#define GC_MARKED       (size_t)0x1
+#ifdef DOUBLY_LINKED_FL
+// This bit indicates that we'll need to set the bgc mark bit for this object during an FGC.
+// We only do this when we decide to compact.
+#define BGC_MARKED_BY_FGC (size_t)0x2
+#define MAKE_FREE_OBJ_IN_COMPACT (size_t)0x4
+#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED|BGC_MARKED_BY_FGC|MAKE_FREE_OBJ_IN_COMPACT)
+#else //DOUBLY_LINKED_FL
+#define ALLOWED_SPECIAL_HEADER_BITS (GC_MARKED)
+#endif //!DOUBLY_LINKED_FL
+
+#ifdef HOST_64BIT
+#define SPECIAL_HEADER_BITS (0x7)
+#else
+#define SPECIAL_HEADER_BITS (0x3)
+#endif
+
+#define free_object_base_size (plug_skew + sizeof(ArrayBase))
+
+#define free_list_slot(x) ((uint8_t**)(x))[2]
+#define free_list_undo(x) ((uint8_t**)(x))[-1]
+#define UNDO_EMPTY ((uint8_t*)1)
+
+#ifdef DOUBLY_LINKED_FL
+#define free_list_prev(x) ((uint8_t**)(x))[3]
+#define PREV_EMPTY ((uint8_t*)1)
+
+inline
+void check_and_clear_in_free_list (uint8_t* o, size_t size)
+{
+    if (size >= min_free_list)
+    {
+        free_list_prev (o) = PREV_EMPTY;
+    }
+}
+
+#endif //DOUBLY_LINKED_FL
+
+typedef void** PTR_PTR;
+
+class CObjectHeader : public Object
+{
+public:
+
+#if defined(FEATURE_NATIVEAOT) || defined(BUILD_AS_STANDALONE)
+    // The GC expects the following methods that are provided by the Object class in the CLR but not provided
+    // by NativeAOT's version of Object.
+    uint32_t GetNumComponents()
+    {
+        return ((ArrayBase *)this)->GetNumComponents();
+    }
+
+    void Validate(BOOL bDeep=TRUE, BOOL bVerifyNextHeader = FALSE, BOOL bVerifySyncBlock = FALSE)
+    {
+        // declaration of extra parameters just so the call site would need no #ifdefs
+        UNREFERENCED_PARAMETER(bVerifyNextHeader);
+        UNREFERENCED_PARAMETER(bVerifySyncBlock);
+
+        MethodTable * pMT = GetMethodTable();
+
+        _ASSERTE(pMT->SanityCheck());
+
+        bool noRangeChecks =
+            (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_NO_RANGE_CHECKS) == GCConfig::HEAPVERIFY_NO_RANGE_CHECKS;
+
+        BOOL fSmallObjectHeapPtr = FALSE, fLargeObjectHeapPtr = FALSE;
+        if (!noRangeChecks)
+        {
+            fSmallObjectHeapPtr = g_theGCHeap->IsHeapPointer(this, TRUE);
+            if (!fSmallObjectHeapPtr)
+                fLargeObjectHeapPtr = g_theGCHeap->IsHeapPointer(this);
+
+            _ASSERTE(fSmallObjectHeapPtr || fLargeObjectHeapPtr);
+        }
+
+#ifdef FEATURE_STRUCTALIGN
+        _ASSERTE(IsStructAligned((uint8_t *)this, GetMethodTable()->GetBaseAlignment()));
+#endif // FEATURE_STRUCTALIGN
+
+#if defined(FEATURE_64BIT_ALIGNMENT) && !defined(FEATURE_NATIVEAOT)
+        if (pMT->RequiresAlign8())
+        {
+            _ASSERTE((((size_t)this) & 0x7) == (pMT->IsValueType() ? 4U : 0U));
+        }
+#endif // FEATURE_64BIT_ALIGNMENT
+
+#ifdef VERIFY_HEAP
+        if (bDeep && (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC))
+            g_theGCHeap->ValidateObjectMember(this);
+#endif
+        if (fSmallObjectHeapPtr)
+        {
+#ifdef FEATURE_BASICFREEZE
+            _ASSERTE(!g_theGCHeap->IsLargeObject(this) || g_theGCHeap->IsInFrozenSegment(this));
+#else
+            _ASSERTE(!g_theGCHeap->IsLargeObject(this));
+#endif
+        }
+    }
+
+    void ValidateHeap(BOOL bDeep)
+    {
+        Validate(bDeep);
+    }
+
+#endif //FEATURE_NATIVEAOT || BUILD_AS_STANDALONE
+
+    /////
+    //
+    // Header Status Information
+    //
+
+    MethodTable    *GetMethodTable() const
+    {
+        return( (MethodTable *) (((size_t) RawGetMethodTable()) & (~SPECIAL_HEADER_BITS)));
+    }
+
+    void SetMarked()
+    {
+        _ASSERTE(RawGetMethodTable());
+        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | GC_MARKED));
+    }
+
+    BOOL IsMarked() const
+    {
+        return !!(((size_t)RawGetMethodTable()) & GC_MARKED);
+    }
+
+    void SetPinned()
+    {
+        assert (!(gc_heap::settings.concurrent));
+        GetHeader()->SetGCBit();
+    }
+
+    BOOL IsPinned() const
+    {
+        return !!((((CObjectHeader*)this)->GetHeader()->GetBits()) & BIT_SBLK_GC_RESERVE);
+    }
+
+    // Now we set more bits should actually only clear the mark bit
+    void ClearMarked()
+    {
+#ifdef DOUBLY_LINKED_FL
+        RawSetMethodTable ((MethodTable *)(((size_t) RawGetMethodTable()) & (~GC_MARKED)));
+#else
+        RawSetMethodTable (GetMethodTable());
+#endif //DOUBLY_LINKED_FL
+    }
+
+#ifdef DOUBLY_LINKED_FL
+    void SetBGCMarkBit()
+    {
+        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | BGC_MARKED_BY_FGC));
+    }
+    BOOL IsBGCMarkBitSet() const
+    {
+        return !!(((size_t)RawGetMethodTable()) & BGC_MARKED_BY_FGC);
+    }
+    void ClearBGCMarkBit()
+    {
+        RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~BGC_MARKED_BY_FGC)));
+    }
+
+    void SetFreeObjInCompactBit()
+    {
+        RawSetMethodTable((MethodTable *) (((size_t) RawGetMethodTable()) | MAKE_FREE_OBJ_IN_COMPACT));
+    }
+    BOOL IsFreeObjInCompactBitSet() const
+    {
+        return !!(((size_t)RawGetMethodTable()) & MAKE_FREE_OBJ_IN_COMPACT);
+    }
+    void ClearFreeObjInCompactBit()
+    {
+#ifdef _DEBUG
+        // check this looks like an object, but do NOT validate pointers to other objects
+        // as these may not be valid yet - we are calling this during compact_phase
+        Validate(FALSE);
+#endif //_DEBUG
+        RawSetMethodTable((MethodTable *)(((size_t) RawGetMethodTable()) & (~MAKE_FREE_OBJ_IN_COMPACT)));
+    }
+#endif //DOUBLY_LINKED_FL
+
+    size_t ClearSpecialBits()
+    {
+        size_t special_bits = ((size_t)RawGetMethodTable()) & SPECIAL_HEADER_BITS;
+        if (special_bits != 0)
+        {
+            assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0);
+            RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) & ~(SPECIAL_HEADER_BITS)));
+        }
+        return special_bits;
+    }
+
+    void SetSpecialBits (size_t special_bits)
+    {
+        assert ((special_bits & (~ALLOWED_SPECIAL_HEADER_BITS)) == 0);
+        if (special_bits != 0)
+        {
+            RawSetMethodTable ((MethodTable*)(((size_t)RawGetMethodTable()) | special_bits));
+        }
+    }
+
+    CGCDesc *GetSlotMap ()
+    {
+        assert (GetMethodTable()->ContainsGCPointers());
+        return CGCDesc::GetCGCDescFromMT(GetMethodTable());
+    }
+
+    void SetFree(size_t size)
+    {
+        assert (size >= free_object_base_size);
+
+        assert (g_gc_pFreeObjectMethodTable->GetBaseSize() == free_object_base_size);
+        assert (g_gc_pFreeObjectMethodTable->RawGetComponentSize() == 1);
+
+        RawSetMethodTable( g_gc_pFreeObjectMethodTable );
+
+        size_t* numComponentsPtr = (size_t*) &((uint8_t*) this)[ArrayBase::GetOffsetOfNumComponents()];
+        *numComponentsPtr = size - free_object_base_size;
+#ifdef VERIFY_HEAP
+        //This introduces a bug in the free list management.
+        //((void**) this)[-1] = 0;    // clear the sync block,
+        assert (*numComponentsPtr >= 0);
+        if (GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC)
+        {
+            memset (((uint8_t*)this)+sizeof(ArrayBase), 0xcc, *numComponentsPtr);
+#ifdef DOUBLY_LINKED_FL
+            // However, in this case we can't leave the Next field uncleared because no one will clear it
+            // so it remains 0xcc and that's not good for verification
+            if (*numComponentsPtr > 0)
+            {
+                free_list_slot (this) = 0;
+            }
+#endif //DOUBLY_LINKED_FL
+        }
+#endif //VERIFY_HEAP
+
+#ifdef DOUBLY_LINKED_FL
+        // For background GC, we need to distinguish between a free object that's not on the free list
+        // and one that is. So we always set its prev to PREV_EMPTY to indicate that it's a free
+        // object that's not on the free list. If it should be on the free list, it will be set to the
+        // appropriate non zero value.
+        check_and_clear_in_free_list ((uint8_t*)this, size);
+#endif //DOUBLY_LINKED_FL
+    }
+
+    void UnsetFree()
+    {
+        size_t size = free_object_base_size - plug_skew;
+
+        // since we only need to clear 2 ptr size, we do it manually
+        PTR_PTR m = (PTR_PTR) this;
+        for (size_t i = 0; i < size / sizeof(PTR_PTR); i++)
+            *(m++) = 0;
+    }
+
+    BOOL IsFree () const
+    {
+        return (GetMethodTable() == g_gc_pFreeObjectMethodTable);
+    }
+
+#ifdef FEATURE_STRUCTALIGN
+    int GetRequiredAlignment () const
+    {
+        return GetMethodTable()->GetRequiredAlignment();
+    }
+#endif // FEATURE_STRUCTALIGN
+
+    BOOL ContainsGCPointers() const
+    {
+        return GetMethodTable()->ContainsGCPointers();
+    }
+
+#ifdef COLLECTIBLE_CLASS
+    BOOL Collectible() const
+    {
+        return GetMethodTable()->Collectible();
+    }
+
+    FORCEINLINE BOOL ContainsGCPointersOrCollectible() const
+    {
+        MethodTable *pMethodTable = GetMethodTable();
+        return (pMethodTable->ContainsGCPointers() || pMethodTable->Collectible());
+    }
+#endif //COLLECTIBLE_CLASS
+
+    Object* GetObjectBase() const
+    {
+        return (Object*) this;
+    }
+};
+
+#define header(i) ((CObjectHeader*)(i))
+#define method_table(o) ((CObjectHeader*)(o))->GetMethodTable()
+
+inline
+BOOL is_induced (gc_reason reason)
+{
+    return ((reason == reason_induced) ||
+            (reason == reason_induced_noforce) ||
+            (reason == reason_lowmemory) ||
+            (reason == reason_lowmemory_blocking) ||
+            (reason == reason_induced_compacting) ||
+            (reason == reason_induced_aggressive) ||
+            (reason == reason_lowmemory_host) ||
+            (reason == reason_lowmemory_host_blocking));
+}
+
+inline size_t my_get_size (Object* ob)
+{
+    MethodTable* mT = header(ob)->GetMethodTable();
+
+    return (mT->GetBaseSize() +
+            (mT->HasComponentSize() ?
+             ((size_t)((CObjectHeader*)ob)->GetNumComponents() * mT->RawGetComponentSize()) : 0));
+}
+
+#define size(i) my_get_size (header(i))
+#define marked(i) header(i)->IsMarked()
+#define set_marked(i) header(i)->SetMarked()
+#define clear_marked(i) header(i)->ClearMarked()
+#define pinned(i) header(i)->IsPinned()
+#define set_pinned(i) header(i)->SetPinned()
+#define clear_pinned(i) header(i)->GetHeader()->ClrGCBit();
+
+inline size_t unused_array_size(uint8_t * p)
+{
+    assert(((CObjectHeader*)p)->IsFree());
+
+    size_t* numComponentsPtr = (size_t*)(p + ArrayBase::GetOffsetOfNumComponents());
+    return free_object_base_size + *numComponentsPtr;
+}
+
+inline
+size_t AlignQword (size_t nbytes)
+{
+#ifdef FEATURE_STRUCTALIGN
+    return Align (nbytes);
+#else // FEATURE_STRUCTALIGN
+    return (nbytes + 7) & ~7;
+#endif // FEATURE_STRUCTALIGN
+}
+
+inline
+BOOL Aligned (size_t n)
+{
+    return (n & ALIGNCONST) == 0;
+}
+
+//CLR_SIZE  is the max amount of bytes from gen0 that is set to 0 in one chunk
+#ifdef SERVER_GC
+#define CLR_SIZE ((size_t)(8*1024+32))
+#else //SERVER_GC
+#define CLR_SIZE ((size_t)(8*1024+32))
+#endif //SERVER_GC
+
+#define DECOMMIT_SIZE_PER_MILLISECOND (160*1024)
+
+#ifndef MULTIPLE_HEAPS
+extern const int n_heaps;
+#endif //MULTIPLE_HEAPS
+
+#ifdef MULTIPLE_HEAPS
+extern bool affinity_config_specified_p;
+#if defined(TARGET_AMD64) && !(defined(_MSC_VER) || defined(__GNUC__))
+extern "C" ptrdiff_t get_cycle_count(void);
+#else
+ptrdiff_t get_cycle_count();
+#endif
+
+struct node_heap_count
+{
+    int node_no;
+    int heap_count;
+};
+
+class heap_select
+{
+    heap_select() {}
+public:
+    static uint8_t* sniff_buffer;
+    static unsigned n_sniff_buffers;
+    static unsigned cur_sniff_index;
+
+    static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
+    static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
+    static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
+    static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
+
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+    // Note this is the total numa nodes GC heaps are on. There might be
+    // more on the machine if GC threads aren't using all of them.
+    static uint16_t total_numa_nodes;
+    static node_heap_count heaps_on_node[MAX_SUPPORTED_NODES];
+#endif
+
+    static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
+    {
+        ptrdiff_t start_cycles = get_cycle_count();
+        uint8_t sniff = sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE];
+        assert (sniff == 0);
+        ptrdiff_t elapsed_cycles = get_cycle_count() - start_cycles;
+        // add sniff here just to defeat the optimizer
+        elapsed_cycles += sniff;
+        return (int) elapsed_cycles;
+    }
+
+public:
+    static BOOL init(int n_heaps)
+    {
+        assert (sniff_buffer == NULL && n_sniff_buffers == 0);
+        if (!GCToOSInterface::CanGetCurrentProcessorNumber())
+        {
+            n_sniff_buffers = n_heaps*2+1;
+            size_t n_cache_lines = 1 + n_heaps * n_sniff_buffers + 1;
+            size_t sniff_buf_size = n_cache_lines * HS_CACHE_LINE_SIZE;
+            if (sniff_buf_size / HS_CACHE_LINE_SIZE != n_cache_lines) // check for overlow
+            {
+                return FALSE;
+            }
+
+            sniff_buffer = new (nothrow) uint8_t[sniff_buf_size];
+            if (sniff_buffer == 0)
+                return FALSE;
+            memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t));
+        }
+
+        bool do_numa = GCToOSInterface::CanEnableGCNumaAware();
+
+        // we want to assign heap indices such that there is a contiguous
+        // range of heap numbers for each numa node
+
+        // we do this in two passes:
+        // 1. gather processor numbers and numa node numbers for all heaps
+        // 2. assign heap numbers for each numa node
+
+        // Pass 1: gather processor numbers and numa node numbers
+        uint16_t proc_no[MAX_SUPPORTED_CPUS];
+        uint16_t node_no[MAX_SUPPORTED_CPUS];
+        uint16_t max_node_no = 0;
+        uint16_t heap_num;
+        for (heap_num = 0; heap_num < n_heaps; heap_num++)
+        {
+            if (!GCToOSInterface::GetProcessorForHeap (heap_num, &proc_no[heap_num], &node_no[heap_num]))
+                break;
+            assert(proc_no[heap_num] < MAX_SUPPORTED_CPUS);
+            if (!do_numa || node_no[heap_num] == NUMA_NODE_UNDEFINED)
+                node_no[heap_num] = 0;
+            max_node_no = max(max_node_no, node_no[heap_num]);
+        }
+
+        // Pass 2: assign heap numbers by numa node
+        int cur_heap_no = 0;
+        for (uint16_t cur_node_no = 0; cur_node_no <= max_node_no; cur_node_no++)
+        {
+            for (int i = 0; i < heap_num; i++)
+            {
+                if (node_no[i] != cur_node_no)
+                    continue;
+
+                // we found a heap on cur_node_no
+                heap_no_to_proc_no[cur_heap_no] = proc_no[i];
+                heap_no_to_numa_node[cur_heap_no] = cur_node_no;
+
+                cur_heap_no++;
+            }
+        }
+
+        return TRUE;
+    }
+
+    static void init_cpu_mapping(int heap_number)
+    {
+        if (GCToOSInterface::CanGetCurrentProcessorNumber())
+        {
+            uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber();
+            // For a 32-bit process running on a machine with > 64 procs,
+            // even though the process can only use up to 32 procs, the processor
+            // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0,
+            // the GetCurrentProcessorNumber will return a number that's >= 64.
+            proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS] = (uint16_t)heap_number;
+        }
+    }
+
+    static void mark_heap(int heap_number)
+    {
+        if (GCToOSInterface::CanGetCurrentProcessorNumber())
+            return;
+
+        for (unsigned sniff_index = 0; sniff_index < n_sniff_buffers; sniff_index++)
+            sniff_buffer[(1 + heap_number*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1;
+    }
+
+    static int select_heap(alloc_context* acontext)
+    {
+#ifndef TRACE_GC
+        UNREFERENCED_PARAMETER(acontext); // only referenced by dprintf
+#endif //TRACE_GC
+
+        if (GCToOSInterface::CanGetCurrentProcessorNumber())
+        {
+            uint32_t proc_no = GCToOSInterface::GetCurrentProcessorNumber();
+            // For a 32-bit process running on a machine with > 64 procs,
+            // even though the process can only use up to 32 procs, the processor
+            // index can be >= 64; or in the cpu group case, if the process is not running in cpu group #0,
+            // the GetCurrentProcessorNumber will return a number that's >= 64.
+            int adjusted_heap = proc_no_to_heap_no[proc_no % MAX_SUPPORTED_CPUS];
+            // with dynamic heap count, need to make sure the value is in range.
+            if (adjusted_heap >= gc_heap::n_heaps)
+            {
+                adjusted_heap %= gc_heap::n_heaps;
+            }
+            return adjusted_heap;
+        }
+
+        unsigned sniff_index = Interlocked::Increment(&cur_sniff_index);
+        sniff_index %= n_sniff_buffers;
+
+        int best_heap = 0;
+        int best_access_time = 1000*1000*1000;
+        int second_best_access_time = best_access_time;
+
+        uint8_t *l_sniff_buffer = sniff_buffer;
+        unsigned l_n_sniff_buffers = n_sniff_buffers;
+        for (int heap_number = 0; heap_number < gc_heap::n_heaps; heap_number++)
+        {
+            int this_access_time = access_time(l_sniff_buffer, heap_number, sniff_index, l_n_sniff_buffers);
+            if (this_access_time < best_access_time)
+            {
+                second_best_access_time = best_access_time;
+                best_access_time = this_access_time;
+                best_heap = heap_number;
+            }
+            else if (this_access_time < second_best_access_time)
+            {
+                second_best_access_time = this_access_time;
+            }
+        }
+
+        if (best_access_time*2 < second_best_access_time)
+        {
+            sniff_buffer[(1 + best_heap*n_sniff_buffers + sniff_index)*HS_CACHE_LINE_SIZE] &= 1;
+
+            dprintf (3, ("select_heap yields crisp %d for context %p\n", best_heap, (void *)acontext));
+        }
+        else
+        {
+            dprintf (3, ("select_heap yields vague %d for context %p\n", best_heap, (void *)acontext ));
+        }
+
+        return best_heap;
+    }
+
+    static bool can_find_heap_fast()
+    {
+        return GCToOSInterface::CanGetCurrentProcessorNumber();
+    }
+
+    static uint16_t find_proc_no_from_heap_no(int heap_number)
+    {
+        return heap_no_to_proc_no[heap_number];
+    }
+
+    static uint16_t find_numa_node_from_heap_no(int heap_number)
+    {
+        return heap_no_to_numa_node[heap_number];
+    }
+
+    static void init_numa_node_to_heap_map(int nheaps)
+    {
+        // Called right after GCHeap::Init() for each heap
+        // For each NUMA node used by the heaps, the
+        // numa_node_to_heap_map[numa_node] is set to the first heap number on that node and
+        // numa_node_to_heap_map[numa_node + 1] is set to the first heap number not on that node
+        // Set the start of the heap number range for the first NUMA node
+        numa_node_to_heap_map[heap_no_to_numa_node[0]] = 0;
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+        total_numa_nodes = 0;
+        memset (heaps_on_node, 0, sizeof (heaps_on_node));
+        heaps_on_node[0].node_no = heap_no_to_numa_node[0];
+        heaps_on_node[0].heap_count = 1;
+#endif //HEAP_BALANCE_INSTRUMENTATION
+
+        for (int i=1; i < nheaps; i++)
+        {
+            if (heap_no_to_numa_node[i] != heap_no_to_numa_node[i-1])
+            {
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+                total_numa_nodes++;
+                heaps_on_node[total_numa_nodes].node_no = heap_no_to_numa_node[i];
+#endif
+
+                // Set the end of the heap number range for the previous NUMA node
+                numa_node_to_heap_map[heap_no_to_numa_node[i-1] + 1] =
+                // Set the start of the heap number range for the current NUMA node
+                numa_node_to_heap_map[heap_no_to_numa_node[i]] = (uint16_t)i;
+            }
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+            (heaps_on_node[total_numa_nodes].heap_count)++;
+#endif
+        }
+
+        // Set the end of the heap range for the last NUMA node
+        numa_node_to_heap_map[heap_no_to_numa_node[nheaps-1] + 1] = (uint16_t)nheaps; //mark the end with nheaps
+
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+        total_numa_nodes++;
+#endif
+    }
+
+    static bool get_info_proc (int index, uint16_t* proc_no, uint16_t* node_no, int* start_heap, int* end_heap)
+    {
+        if (!GCToOSInterface::GetProcessorForHeap ((uint16_t)index, proc_no, node_no))
+            return false;
+
+        if (*node_no == NUMA_NODE_UNDEFINED)
+            *node_no = 0;
+
+        *start_heap = (int)numa_node_to_heap_map[*node_no];
+        *end_heap = (int)(numa_node_to_heap_map[*node_no + 1]);
+
+        return true;
+    }
+
+    static void distribute_other_procs (bool distribute_all_p)
+    {
+        if (affinity_config_specified_p)
+            return;
+
+        if (distribute_all_p)
+        {
+            uint16_t current_heap_no_on_node[MAX_SUPPORTED_CPUS];
+            memset (current_heap_no_on_node, 0, sizeof (current_heap_no_on_node));
+            uint16_t current_heap_no = 0;
+
+            uint16_t proc_no = 0;
+            uint16_t node_no = 0;
+
+            for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
+            {
+                int start_heap, end_heap;
+                if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap))
+                    break;
+
+                // This indicates there are heaps on this node
+                if ((end_heap - start_heap) > 0)
+                {
+                    proc_no_to_heap_no[proc_no] = (current_heap_no_on_node[node_no] % (uint16_t)(end_heap - start_heap)) + (uint16_t)start_heap;
+                    (current_heap_no_on_node[node_no])++;
+                }
+                else
+                {
+                    proc_no_to_heap_no[proc_no] = current_heap_no % gc_heap::n_heaps;
+                    (current_heap_no)++;
+                }
+            }
+        }
+        else
+        {
+            // This is for scenarios where GCHeapCount is specified as something like
+            // (g_num_active_processors - 2) to allow less randomization to the Server GC threads.
+            // In this case we want to assign the right heaps to those procs, ie if they share
+            // the same numa node we want to assign local heaps to those procs. Otherwise we
+            // let the heap balancing mechanism take over for now.
+            uint16_t proc_no = 0;
+            uint16_t node_no = 0;
+            int current_node_no = -1;
+            int current_heap_on_node = -1;
+
+            for (int i = gc_heap::n_heaps; i < (int)g_num_active_processors; i++)
+            {
+                int start_heap, end_heap;
+                if (!get_info_proc (i, &proc_no, &node_no, &start_heap, &end_heap))
+                    break;
+
+                if ((end_heap - start_heap) > 0)
+                {
+                    if (node_no == current_node_no)
+                    {
+                        // We already iterated through all heaps on this node, don't add more procs to these
+                        // heaps.
+                        if (current_heap_on_node >= end_heap)
+                        {
+                            continue;
+                        }
+                    }
+                    else
+                    {
+                        current_node_no = node_no;
+                        current_heap_on_node = start_heap;
+                    }
+
+                    proc_no_to_heap_no[proc_no] = (uint16_t)current_heap_on_node;
+
+                    current_heap_on_node++;
+                }
+            }
+        }
+    }
+
+    static void get_heap_range_for_heap(int hn, int* start, int* end)
+    {
+        uint16_t numa_node = heap_no_to_numa_node[hn];
+        *start = (int)numa_node_to_heap_map[numa_node];
+        *end   = (int)(numa_node_to_heap_map[numa_node+1]);
+#ifdef HEAP_BALANCE_INSTRUMENTATION
+        dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end));
+#endif //HEAP_BALANCE_INSTRUMENTATION
+    }
+};
+#endif //MULTIPLE_HEAPS
+
+class mark
+{
+public:
+    uint8_t* first;
+    size_t len;
+
+    // If we want to save space we can have a pool of plug_and_gap's instead of
+    // always having 2 allocated for each pinned plug.
+    gap_reloc_pair saved_pre_plug;
+    // If we decide to not compact, we need to restore the original values.
+    gap_reloc_pair saved_pre_plug_reloc;
+
+    gap_reloc_pair saved_post_plug;
+
+    // Supposedly Pinned objects cannot have references but we are seeing some from pinvoke
+    // frames. Also if it's an artificially pinned plug created by us, it can certainly
+    // have references.
+    // We know these cases will be rare so we can optimize this to be only allocated on demand.
+    gap_reloc_pair saved_post_plug_reloc;
+
+    // We need to calculate this after we are done with plan phase and before compact
+    // phase because compact phase will change the bricks so relocate_address will no
+    // longer work.
+    uint8_t* saved_pre_plug_info_reloc_start;
+
+    // We need to save this because we will have no way to calculate it, unlike the
+    // pre plug info start which is right before this plug.
+    uint8_t* saved_post_plug_info_start;
+
+#ifdef SHORT_PLUGS
+    uint8_t* allocation_context_start_region;
+#endif //SHORT_PLUGS
+
+    // How the bits in these bytes are organized:
+    // MSB --> LSB
+    // bit to indicate whether it's a short obj | 3 bits for refs in this short obj | 2 unused bits | bit to indicate if it's collectible | last bit
+    // last bit indicates if there's pre or post info associated with this plug. If it's not set all other bits will be 0.
+    BOOL saved_pre_p;
+    BOOL saved_post_p;
+
+#ifdef _DEBUG
+    // We are seeing this is getting corrupted for a PP with a NP after.
+    // Save it when we first set it and make sure it doesn't change.
+    gap_reloc_pair saved_post_plug_debug;
+#endif //_DEBUG
+
+    size_t get_max_short_bits()
+    {
+        return (sizeof (gap_reloc_pair) / sizeof (uint8_t*));
+    }
+
+    // pre bits
+    size_t get_pre_short_start_bit ()
+    {
+        return (sizeof (saved_pre_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*)));
+    }
+
+    BOOL pre_short_p()
+    {
+        return (saved_pre_p & (1 << (sizeof (saved_pre_p) * 8 - 1)));
+    }
+
+    void set_pre_short()
+    {
+        saved_pre_p |= (1 << (sizeof (saved_pre_p) * 8 - 1));
+    }
+
+    void set_pre_short_bit (size_t bit)
+    {
+        saved_pre_p |= 1 << (get_pre_short_start_bit() + bit);
+    }
+
+    BOOL pre_short_bit_p (size_t bit)
+    {
+        return (saved_pre_p & (1 << (get_pre_short_start_bit() + bit)));
+    }
+
+#ifdef COLLECTIBLE_CLASS
+    void set_pre_short_collectible()
+    {
+        saved_pre_p |= 2;
+    }
+
+    BOOL pre_short_collectible_p()
+    {
+        return (saved_pre_p & 2);
+    }
+#endif //COLLECTIBLE_CLASS
+
+    // post bits
+    size_t get_post_short_start_bit ()
+    {
+        return (sizeof (saved_post_p) * 8 - 1 - (sizeof (gap_reloc_pair) / sizeof (uint8_t*)));
+    }
+
+    BOOL post_short_p()
+    {
+        return (saved_post_p & (1 << (sizeof (saved_post_p) * 8 - 1)));
+    }
+
+    void set_post_short()
+    {
+        saved_post_p |= (1 << (sizeof (saved_post_p) * 8 - 1));
+    }
+
+    void set_post_short_bit (size_t bit)
+    {
+        saved_post_p |= 1 << (get_post_short_start_bit() + bit);
+    }
+
+    BOOL post_short_bit_p (size_t bit)
+    {
+        return (saved_post_p & (1 << (get_post_short_start_bit() + bit)));
+    }
+
+#ifdef COLLECTIBLE_CLASS
+    void set_post_short_collectible()
+    {
+        saved_post_p |= 2;
+    }
+
+    BOOL post_short_collectible_p()
+    {
+        return (saved_post_p & 2);
+    }
+#endif //COLLECTIBLE_CLASS
+
+    uint8_t* get_plug_address() { return first; }
+
+    BOOL has_pre_plug_info() { return saved_pre_p; }
+    BOOL has_post_plug_info() { return saved_post_p; }
+
+    gap_reloc_pair* get_pre_plug_reloc_info() { return &saved_pre_plug_reloc; }
+    gap_reloc_pair* get_post_plug_reloc_info() { return &saved_post_plug_reloc; }
+    void set_pre_plug_info_reloc_start (uint8_t* reloc) { saved_pre_plug_info_reloc_start = reloc; }
+    uint8_t* get_post_plug_info_start() { return saved_post_plug_info_start; }
+
+    // We need to temporarily recover the shortened plugs for compact phase so we can
+    // copy over the whole plug and their related info (mark bits/cards). But we will
+    // need to set the artificial gap back so compact phase can keep reading the plug info.
+    // We also need to recover the saved info because we'll need to recover it later.
+    //
+    // So we would call swap_p*_plug_and_saved once to recover the object info; then call
+    // it again to recover the artificial gap.
+    void swap_pre_plug_and_saved()
+    {
+        gap_reloc_pair temp;
+        memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp));
+        memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc));
+        saved_pre_plug_reloc = temp;
+    }
+
+    void swap_post_plug_and_saved()
+    {
+        gap_reloc_pair temp;
+        memcpy (&temp, saved_post_plug_info_start, sizeof (temp));
+        memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc));
+        saved_post_plug_reloc = temp;
+    }
+
+    void swap_pre_plug_and_saved_for_profiler()
+    {
+        gap_reloc_pair temp;
+        memcpy (&temp, (first - sizeof (plug_and_gap)), sizeof (temp));
+        memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug));
+        saved_pre_plug = temp;
+    }
+
+    void swap_post_plug_and_saved_for_profiler()
+    {
+        gap_reloc_pair temp;
+        memcpy (&temp, saved_post_plug_info_start, sizeof (temp));
+        memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug));
+        saved_post_plug = temp;
+    }
+
+    // We should think about whether it's really necessary to have to copy back the pre plug
+    // info since it was already copied during compacting plugs. But if a plug doesn't move
+    // by >= 3 ptr size (the size of gap_reloc_pair), it means we'd have to recover pre plug info.
+    size_t recover_plug_info()
+    {
+        // We need to calculate the size for sweep case in order to correctly record the
+        // free_obj_space - sweep would've made these artificial gaps into free objects and
+        // we would need to deduct the size because now we are writing into those free objects.
+        size_t recovered_sweep_size = 0;
+
+        if (saved_pre_p)
+        {
+            if (gc_heap::settings.compaction)
+            {
+                dprintf (3, ("%p: REC Pre: %p-%p",
+                    first,
+                    &saved_pre_plug_reloc,
+                    saved_pre_plug_info_reloc_start));
+                memcpy (saved_pre_plug_info_reloc_start, &saved_pre_plug_reloc, sizeof (saved_pre_plug_reloc));
+            }
+            else
+            {
+                dprintf (3, ("%p: REC Pre: %p-%p",
+                    first,
+                    &saved_pre_plug,
+                    (first - sizeof (plug_and_gap))));
+                memcpy ((first - sizeof (plug_and_gap)), &saved_pre_plug, sizeof (saved_pre_plug));
+                recovered_sweep_size += sizeof (saved_pre_plug);
+            }
+        }
+
+        if (saved_post_p)
+        {
+            if (gc_heap::settings.compaction)
+            {
+                dprintf (3, ("%p: REC Post: %p-%p",
+                    first,
+                    &saved_post_plug_reloc,
+                    saved_post_plug_info_start));
+                memcpy (saved_post_plug_info_start, &saved_post_plug_reloc, sizeof (saved_post_plug_reloc));
+            }
+            else
+            {
+                dprintf (3, ("%p: REC Post: %p-%p",
+                    first,
+                    &saved_post_plug,
+                    saved_post_plug_info_start));
+                memcpy (saved_post_plug_info_start, &saved_post_plug, sizeof (saved_post_plug));
+                recovered_sweep_size += sizeof (saved_post_plug);
+            }
+        }
+
+        return recovered_sweep_size;
+    }
+};
+
+// We don't store seg_mapping_table in card_table_info because there's only always one view.
+extern seg_mapping* seg_mapping_table;
+
+class card_table_info
+{
+public:
+    unsigned    recount;
+    size_t      size;
+    uint32_t*   next_card_table;
+
+    uint8_t*    lowest_address;
+    uint8_t*    highest_address;
+    short*      brick_table;
+
+#ifdef CARD_BUNDLE
+    uint32_t*   card_bundle_table;
+#endif //CARD_BUNDLE
+
+    // mark_array is always at the end of the data structure because we
+    // want to be able to make one commit call for everything before it.
+#ifdef BACKGROUND_GC
+    uint32_t*   mark_array;
+#endif //BACKGROUND_GC
+};
+
+static_assert(offsetof(dac_card_table_info, size) == offsetof(card_table_info, size), "DAC card_table_info layout mismatch");
+static_assert(offsetof(dac_card_table_info, next_card_table) == offsetof(card_table_info, next_card_table), "DAC card_table_info layout mismatch");
+
+#ifdef WRITE_WATCH
+#ifndef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+extern bool virtual_alloc_hardware_write_watch;
+#endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+extern bool hardware_write_watch_capability;
+
+inline bool can_use_hardware_write_watch()
+{
+    return hardware_write_watch_capability;
+}
+
+inline bool can_use_write_watch_for_card_table()
+{
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    return true;
+#else
+    return can_use_hardware_write_watch();
+#endif
+}
+#endif //WRITE_WATCH
+
+inline
+size_t gib (size_t num)
+{
+    return (num / 1024 / 1024 / 1024);
+}
+
+#ifdef HOST_64BIT
+#define brick_size ((size_t)4096)
+#else
+#define brick_size ((size_t)2048)
+#endif //HOST_64BIT
+
+inline
+uint8_t* align_on_brick (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + brick_size - 1) & ~(brick_size - 1));
+}
+
+#ifdef CARD_BUNDLE
+//threshold of heap size to turn on card bundles.
+#define SH_TH_CARD_BUNDLE  (40*1024*1024)
+#define MH_TH_CARD_BUNDLE  (180*1024*1024)
+#endif //CARD_BUNDLE
+
+// time in milliseconds between decommit steps
+#define DECOMMIT_TIME_STEP_MILLISECONDS (100)
+
+#if defined(HOST_64BIT)
+#define MAX_ALLOWED_MEM_LOAD 85
+#define MIN_YOUNGEST_GEN_DESIRED (16*1024*1024)
+#endif //HOST_64BIT
+
+extern const size_t min_segment_size_hard_limit;
+extern const size_t low_latency_alloc;
+extern gc_reason gc_trigger_reason;
+extern double qpf_us;
+
+uint64_t RawGetHighPrecisionTimeStamp();
+
+#ifdef WRITE_WATCH
+#ifdef BACKGROUND_GC
+extern const size_t ww_reset_quantum;
+#endif //BACKGROUND_GC
+
+inline
+size_t align_write_watch_lower_page (size_t add)
+{
+    return (add & ~(WRITE_WATCH_UNIT_SIZE - 1));
+}
+
+inline
+uint8_t* align_write_watch_lower_page (uint8_t* add)
+{
+    return (uint8_t*)((size_t)add & ~((size_t)OS_PAGE_SIZE - 1));
+}
+#endif //WRITE_WATCH
+
+void stomp_write_barrier_ephemeral (uint8_t* ephemeral_low, uint8_t* ephemeral_high
+#ifdef USE_REGIONS
+                                     , gc_heap::region_info* map_region_to_generation_skewed
+                                    , uint8_t region_shr
+#endif //USE_REGIONS
+                                    );
+
+void process_sync_log_stats();
+void* virtual_alloc (size_t size);
+void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node = NUMA_NODE_UNDEFINED);
+size_t get_valid_segment_size (BOOL large_seg = FALSE);
+heap_segment* ro_segment_lookup (uint8_t* o);
+heap_segment* heap_segment_rw (heap_segment* ns);
+heap_segment* heap_segment_next_rw (heap_segment* seg);
+heap_segment* heap_segment_prev_rw (heap_segment* begin, heap_segment* seg);
+void enter_spin_lock (GCSpinLock* spin_lock);
+
+inline
+heap_segment* heap_segment_in_range (heap_segment* ns)
+{
+    if ((ns == 0) || heap_segment_in_range_p (ns))
+    {
+        return ns;
+    }
+    else
+    {
+        do
+        {
+            ns = heap_segment_next (ns);
+        } while ((ns != 0) && !heap_segment_in_range_p (ns));
+        return ns;
+    }
+}
+
+inline
+heap_segment* heap_segment_next_in_range (heap_segment* seg)
+{
+    heap_segment* ns = heap_segment_next (seg);
+    return heap_segment_in_range (ns);
+}
+
+inline
+BOOL in_range_for_segment (uint8_t* add, heap_segment* seg)
+{
+    return ((add >= heap_segment_mem (seg)) && (add < heap_segment_reserved (seg)));
+}
+
+// This is for methods that need to iterate through all SOH heap segments/regions.
+inline
+int get_start_generation_index()
+{
+#ifdef USE_REGIONS
+    return 0;
+#else
+    return max_generation;
+#endif //USE_REGIONS
+}
+
+inline
+int get_stop_generation_index (int condemned_gen_number)
+{
+#ifdef USE_REGIONS
+    return 0;
+#else
+    return condemned_gen_number;
+#endif //USE_REGIONS
+}
+
+inline
+uint8_t* align_lower_segment (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1));
+}
+
+#ifdef CARD_BUNDLE
+// The card bundle keeps track of groups of card words.
+static const size_t card_bundle_word_width = 32;
+
+static const size_t card_bundle_size = (size_t)(GC_PAGE_SIZE / (sizeof(uint32_t)*card_bundle_word_width));
+
+inline
+size_t card_bundle_word (size_t cardb)
+{
+    return cardb / card_bundle_word_width;
+}
+
+inline
+uint32_t card_bundle_bit (size_t cardb)
+{
+    return (uint32_t)(cardb % card_bundle_word_width);
+}
+
+size_t align_cardw_on_bundle (size_t cardw);
+size_t cardw_card_bundle (size_t cardw);
+size_t card_bundle_cardw (size_t cardb);
+uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address);
+#endif //CARD_BUNDLE
+
+inline
+uint8_t* align_lower_brick (uint8_t* add)
+{
+    return (uint8_t*)(((size_t)add) & ~(brick_size - 1));
+}
+
+size_t size_brick_of (uint8_t* from, uint8_t* end);
+
+inline
+uint8_t* align_on_card (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + card_size - 1) & ~(card_size - 1 ));
+}
+
+inline
+uint8_t* align_on_card_word (uint8_t* add)
+{
+    return (uint8_t*) ((size_t)(add + (card_size*card_word_width)-1) & ~(card_size*card_word_width - 1));
+}
+
+inline
+uint8_t* align_lower_card (uint8_t* add)
+{
+    return (uint8_t*)((size_t)add & ~(card_size-1));
+}
+
+size_t gcard_of (uint8_t*);
+size_t count_card_of (uint8_t* from, uint8_t* end);
+size_t size_card_of (uint8_t* from, uint8_t* end);
+size_t size_seg_mapping_table_of (uint8_t* from, uint8_t* end);
+heap_segment* seg_mapping_table_segment_of (uint8_t* o);
+#ifdef MULTIPLE_HEAPS
+gc_heap* seg_mapping_table_heap_of (uint8_t* o);
+#endif //MULTIPLE_HEAPS
+
+inline
+gc_heap* gc_heap::heap_of (uint8_t* o)
+{
+#ifdef MULTIPLE_HEAPS
+    if (o == 0)
+        return g_heaps [0];
+
+    gc_heap* hp = seg_mapping_table_heap_of (o);
+    return (hp ? hp : g_heaps[0]);
+#else //MULTIPLE_HEAPS
+    UNREFERENCED_PARAMETER(o);
+    return __this;
+#endif //MULTIPLE_HEAPS
+}
+
+inline
+size_t seg_mapping_word_of (uint8_t* add)
+{
+    return (size_t)add >> gc_heap::min_segment_size_shr;
+}
+
+inline
+unsigned& card_table_refcount (uint32_t* c_table)
+{
+    return *(unsigned*)((char*)c_table - sizeof (card_table_info));
+}
+
+uint32_t* translate_card_table (uint32_t* ct);
+void own_card_table (uint32_t* c_table);
+void release_card_table (uint32_t* c_table);
+void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_bounds_check);
+
+inline
+short*& card_table_brick_table (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->brick_table;
+}
+
+#ifdef CARD_BUNDLE
+inline
+uint32_t*& card_table_card_bundle_table (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->card_bundle_table;
+}
+#endif //CARD_BUNDLE
+
+#ifdef BACKGROUND_GC
+inline
+uint32_t*& card_table_mark_array (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->mark_array;
+}
+
+size_t size_mark_array_of (uint8_t* from, uint8_t* end);
+uint32_t* translate_mark_array (uint32_t* ma);
+#endif //BACKGROUND_GC
+
+inline
+BOOL grow_mark_stack (mark*& m, size_t& len, size_t init_len)
+{
+    size_t new_size = max (init_len, 2 * len);
+    mark* tmp = new (nothrow) mark [new_size];
+    if (tmp)
+    {
+        memcpy (tmp, m, len * sizeof (mark));
+        delete[] m;
+        m = tmp;
+        len = new_size;
+        return TRUE;
+    }
+    else
+    {
+        dprintf (1, ("Failed to allocate %zd bytes for mark stack", (len * sizeof (mark))));
+        return FALSE;
+    }
+}
+
+enum
+{
+    CORINFO_EXCEPTION_GC = 0xE0004743 // 'GC'
+};
+
+#define mark_stack_empty_p() (mark_stack_base == mark_stack_tos)
+
+#ifdef MULTIPLE_HEAPS
+
+#ifdef GC_CONFIG_DRIVEN
+#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}}
+#else //GC_CONFIG_DRIVEN
+#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}}
+#endif //GC_CONFIG_DRIVEN
+
+#define m_boundary_fullgc(o) {}
+
+#else //MULTIPLE_HEAPS
+
+#ifdef GC_CONFIG_DRIVEN
+#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;}
+#else
+#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}if (slow > o) slow = o; if (shigh < o) shigh = o;}
+#endif //GC_CONFIG_DRIVEN
+
+#define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;}
+
+#endif //MULTIPLE_HEAPS
+
+#define stolen 2
+#define partial 1
+#define partial_object 3
+
+inline
+BOOL stolen_p (uint8_t* r)
+{
+    return (((size_t)r & 2) && !((size_t)r & 1));
+}
+
+inline
+BOOL partial_p (uint8_t* r)
+{
+    return (((size_t)r & 1) && !((size_t)r & 2));
+}
+
+inline
+BOOL straight_ref_p (uint8_t* r)
+{
+    return (!stolen_p (r) && !partial_p (r));
+}
+
+inline
+BOOL partial_object_p (uint8_t* r)
+{
+    return (((size_t)r & partial_object) == partial_object);
+}
+
+#ifdef MULTIPLE_HEAPS
+extern VOLATILE(BOOL) s_fUnpromotedHandles;
+extern VOLATILE(BOOL) s_fUnscannedPromotions;
+extern VOLATILE(BOOL) s_fScanRequired;
+#endif //MULTIPLE_HEAPS
+
+uint8_t** make_mark_list (size_t size);
+
+#ifdef USE_VXSORT
+void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* range_low, uint8_t* range_high);
+#endif //USE_VXSORT
+
+uint8_t* compute_next_end (heap_segment* seg, uint8_t* low);
+
+inline
+size_t& card_table_size (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->size;
+}
+
+#ifdef USE_REGIONS
+extern region_allocator global_region_allocator;
+extern uint8_t*(*initial_regions)[total_generation_count][2];
+extern const size_t uninitialized_end_gen0_region_space;
+size_t size_region_to_generation_table_of (uint8_t* from, uint8_t* end);
+
+inline
+heap_segment* get_region_info_for_address (uint8_t* address)
+{
+    size_t basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr;
+    heap_segment* basic_region_info_entry = (heap_segment*)&seg_mapping_table[basic_region_index];
+    ptrdiff_t first_field = (ptrdiff_t)heap_segment_allocated (basic_region_info_entry);
+    if (first_field < 0)
+    {
+        basic_region_index += first_field;
+    }
+
+    return ((heap_segment*)(&seg_mapping_table[basic_region_index]));
+}
+
+#ifdef DYNAMIC_HEAP_COUNT
+#define DECOMMISSIONED_VALUE 0xdec0dec0dec0dec0
+static const size_t DECOMMISSIONED_SIZE_T = DECOMMISSIONED_VALUE;
+static const ptrdiff_t DECOMMISSIONED_PTRDIFF_T = (ptrdiff_t)DECOMMISSIONED_VALUE;
+static const ptrdiff_t DECOMMISSIONED_UINT64_T = (uint64_t)DECOMMISSIONED_VALUE;
+static uint8_t* const DECOMMISSIONED_UINT8_T_P = (uint8_t*)DECOMMISSIONED_VALUE;
+static uint8_t** const DECOMMISSIONED_UINT8_T_PP = (uint8_t**)DECOMMISSIONED_VALUE;
+static PTR_heap_segment const DECOMMISSIONED_REGION_P = (PTR_heap_segment)DECOMMISSIONED_VALUE;
+static mark* const DECOMMISSIONED_MARK_P = (mark*)DECOMMISSIONED_VALUE;
+static const BOOL DECOMMISSIONED_BOOL = 0xdec0dec0;
+static const BOOL DECOMMISSIONED_INT = (int)0xdec0dec0;
+static const float DECOMMISSIONED_FLOAT = (float)DECOMMISSIONED_VALUE;
+static const ptrdiff_t UNINITIALIZED_VALUE  = 0xbaadbaadbaadbaad;
+#endif //DYNAMIC_HEAP_COUNT
+
+inline bool is_in_heap_range (uint8_t* o)
+{
+#ifdef FEATURE_BASICFREEZE
+    assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) ||
+        (o == nullptr) || (ro_segment_lookup (o) != nullptr));
+    return ((g_gc_lowest_address <= o) && (o < g_gc_highest_address));
+#else //FEATURE_BASICFREEZE
+    assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address));
+    return (o != nullptr);
+#endif //FEATURE_BASICFREEZE
+}
+
+inline
+uint8_t* get_region_start (heap_segment* region_info)
+{
+    uint8_t* obj_start = heap_segment_mem (region_info);
+    return (obj_start - sizeof (aligned_plug_and_gap));
+}
+
+inline
+size_t get_region_size (heap_segment* region_info)
+{
+    return (size_t)(heap_segment_reserved (region_info) - get_region_start (region_info));
+}
+
+inline
+size_t get_region_committed_size (heap_segment* region)
+{
+    uint8_t* start = get_region_start (region);
+    uint8_t* committed = heap_segment_committed (region);
+    return committed - start;
+}
+
+inline
+size_t get_skewed_basic_region_index_for_address (uint8_t* address)
+{
+    assert ((g_gc_lowest_address <= address) && (address <= g_gc_highest_address));
+    size_t skewed_basic_region_index = (size_t)address >> gc_heap::min_segment_size_shr;
+    return skewed_basic_region_index;
+}
+
+inline
+size_t get_basic_region_index_for_address (uint8_t* address)
+{
+    size_t skewed_basic_region_index = get_skewed_basic_region_index_for_address (address);
+    return (skewed_basic_region_index - get_skewed_basic_region_index_for_address (g_gc_lowest_address));
+}
+
+inline
+heap_segment* get_region_info (uint8_t* region_start)
+{
+    size_t region_index = (size_t)region_start >> gc_heap::min_segment_size_shr;
+    heap_segment* region_info_entry = (heap_segment*)&seg_mapping_table[region_index];
+    dprintf (REGIONS_LOG, ("region info for region %p is at %zd, %zx (alloc: %p)",
+        region_start, region_index, (size_t)region_info_entry, heap_segment_allocated (region_info_entry)));
+    return (heap_segment*)&seg_mapping_table[region_index];
+}
+
+inline
+bool is_free_region (heap_segment* region)
+{
+    return (heap_segment_allocated (region) == nullptr);
+}
+
+inline
+void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num, bool replace_p)
+{
+    int gen_num = heap_segment_gen_num (region);
+    int supposed_plan_gen_num = get_plan_gen_num (gen_num);
+    dprintf (REGIONS_LOG, ("h%d setting plan gen on %p->%p(was gen%d) to %d(should be: %d) %s",
+        heap_number, region,
+        heap_segment_mem (region),
+        gen_num, plan_gen_num,
+        supposed_plan_gen_num,
+        ((plan_gen_num < supposed_plan_gen_num) ? "DEMOTED" : "ND")));
+    region_info region_info_bits_to_set = (region_info)(plan_gen_num << RI_PLAN_GEN_SHR);
+    if ((plan_gen_num < supposed_plan_gen_num) && (heap_segment_pinned_survived (region) != 0))
+    {
+        if (!settings.demotion)
+        {
+            settings.demotion = TRUE;
+        }
+        get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
+        region->flags |= heap_segment_flags_demoted;
+        region_info_bits_to_set = (region_info)(region_info_bits_to_set | RI_DEMOTED);
+    }
+    else
+    {
+        region->flags &= ~heap_segment_flags_demoted;
+    }
+
+    if (replace_p)
+    {
+        int original_plan_gen_num = heap_segment_plan_gen_num (region);
+        planned_regions_per_gen[original_plan_gen_num]--;
+    }
+
+    planned_regions_per_gen[plan_gen_num]++;
+    dprintf (REGIONS_LOG, ("h%d g%d %zx(%zx) -> g%d (total %d region planned in g%d)",
+        heap_number, heap_segment_gen_num (region), (size_t)region, heap_segment_mem (region), plan_gen_num, planned_regions_per_gen[plan_gen_num], plan_gen_num));
+
+    heap_segment_plan_gen_num (region) = plan_gen_num;
+
+    uint8_t* region_start = get_region_start (region);
+    uint8_t* region_end = heap_segment_reserved (region);
+
+    size_t region_index_start = get_basic_region_index_for_address (region_start);
+    size_t region_index_end = get_basic_region_index_for_address (region_end);
+    for (size_t region_index = region_index_start; region_index < region_index_end; region_index++)
+    {
+        assert (plan_gen_num <= max_generation);
+        map_region_to_generation[region_index] = (region_info)(region_info_bits_to_set | (map_region_to_generation[region_index] & ~(RI_PLAN_GEN_MASK|RI_DEMOTED)));
+    }
+}
+
+inline
+void gc_heap::set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_num)
+{
+    if (!heap_segment_swept_in_plan (region))
+    {
+        set_region_plan_gen_num (region, plan_gen_num);
+    }
+}
+#endif //USE_REGIONS
+
+extern const int32_t lock_free;
+extern const int32_t lock_taken;
+extern const int32_t lock_decommissioned;
+
+#define demotion_plug_len_th (6*1024*1024)
+#define LOH_PIN_QUEUE_LENGTH 100
+#define LOH_PIN_DECAY 10
+
+#ifdef USE_REGIONS
+#define sip_surv_ratio_th (90)
+#define sip_old_card_surv_ratio_th (90)
+#endif //USE_REGIONS
+
+static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock);
+
+#ifdef _DEBUG
+#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock) \
+    _ASSERTE((pSpinLock)->holding_thread == GCToEEInterface::GetThread());
+
+inline
+BOOL try_enter_spin_lock(GCSpinLock* pSpinLock)
+{
+    BOOL ret = (Interlocked::CompareExchange(&pSpinLock->lock, 0, -1) == -1);
+    if (ret)
+    {
+        pSpinLock->holding_thread = GCToEEInterface::GetThread();
+    }
+
+    return ret;
+}
+#else // _DEBUG
+#define ASSERT_HOLDING_SPIN_LOCK(pSpinLock)
+
+inline
+BOOL try_enter_spin_lock(GCSpinLock* spin_lock)
+{
+    return (Interlocked::CompareExchange(&spin_lock->lock, 0, -1) == -1);
+}
+#endif // _DEBUG
+
+inline
+enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl)
+{
+    if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) == lock_free)
+        return msl_entered;
+
+    return enter_spin_lock_msl_helper (msl);
+}
+
+#ifdef _DEBUG
+inline
+void enter_spin_lock (GCSpinLock *pSpinLock)
+{
+    enter_spin_lock_noinstru (&pSpinLock->lock);
+    assert (pSpinLock->holding_thread == (Thread*)-1);
+    pSpinLock->holding_thread = GCToEEInterface::GetThread();
+}
+#else //_DEBUG
+inline
+void WaitLonger (int i
+#ifdef SYNCHRONIZATION_STATS
+    , GCSpinLock* spin_lock
+#endif //SYNCHRONIZATION_STATS
+    )
+{
+#ifdef SYNCHRONIZATION_STATS
+    (spin_lock->num_wait_longer)++;
+#endif //SYNCHRONIZATION_STATS
+
+    bool bToggleGC = GCToEEInterface::EnablePreemptiveGC();
+    assert (bToggleGC);
+
+    if (!gc_heap::gc_started)
+    {
+#ifdef SYNCHRONIZATION_STATS
+        (spin_lock->num_switch_thread_w)++;
+#endif //SYNCHRONIZATION_STATS
+        if  (g_num_processors > 1)
+        {
+            YieldProcessor();
+            if  (i & 0x01f)
+                GCToOSInterface::YieldThread (0);
+            else
+                GCToOSInterface::Sleep (5);
+        }
+        else
+            GCToOSInterface::Sleep (5);
+    }
+
+    if (gc_heap::gc_started)
+    {
+        gc_heap::wait_for_gc_done();
+    }
+
+    if (bToggleGC)
+    {
+#ifdef SYNCHRONIZATION_STATS
+        (spin_lock->num_disable_preemptive_w)++;
+#endif //SYNCHRONIZATION_STATS
+        GCToEEInterface::DisablePreemptiveGC();
+    }
+}
+
+inline
+void enter_spin_lock (GCSpinLock* spin_lock)
+{
+retry:
+    if (Interlocked::CompareExchange(&spin_lock->lock, lock_taken, lock_free) != lock_free)
+    {
+        unsigned int i = 0;
+        while (spin_lock->lock != lock_free)
+        {
+            assert (spin_lock->lock != lock_decommissioned);
+            if ((++i & 7) && !gc_heap::gc_started)
+            {
+                if  (g_num_processors > 1)
+                {
+#ifndef MULTIPLE_HEAPS
+                    int spin_count = 32 * yp_spin_count_unit;
+#else //!MULTIPLE_HEAPS
+                    int spin_count = yp_spin_count_unit;
+#endif //!MULTIPLE_HEAPS
+                    for (int j = 0; j < spin_count; j++)
+                    {
+                        if  (spin_lock->lock == lock_free || gc_heap::gc_started)
+                            break;
+                        YieldProcessor();
+                    }
+                    if  (spin_lock->lock != lock_free && !gc_heap::gc_started)
+                    {
+#ifdef SYNCHRONIZATION_STATS
+                        (spin_lock->num_switch_thread)++;
+#endif //SYNCHRONIZATION_STATS
+                        bool cooperative_mode = gc_heap::enable_preemptive ();
+                        GCToOSInterface::YieldThread(0);
+                        gc_heap::disable_preemptive (cooperative_mode);
+                    }
+                }
+                else
+                    GCToOSInterface::YieldThread(0);
+            }
+            else
+            {
+                WaitLonger(i
+#ifdef SYNCHRONIZATION_STATS
+                        , spin_lock
+#endif //SYNCHRONIZATION_STATS
+                    );
+            }
+        }
+        goto retry;
+    }
+}
+#endif //_DEBUG
+
+inline
+void leave_spin_lock(GCSpinLock *pSpinLock)
+{
+#ifdef _DEBUG
+    bool gc_thread_p = GCToEEInterface::WasCurrentThreadCreatedByGC();
+    pSpinLock->released_by_gc_p = gc_thread_p;
+    pSpinLock->holding_thread = (Thread*) -1;
+#endif // _DEBUG
+    if (pSpinLock->lock != -1)
+        VolatileStore<int32_t>((int32_t*)&pSpinLock->lock, -1);
+}
+
+inline
+BOOL same_large_alignment_p (uint8_t* p1, uint8_t* p2)
+{
+#ifdef RESPECT_LARGE_ALIGNMENT
+    const size_t LARGE_ALIGNMENT_MASK = 2 * DATA_ALIGNMENT - 1;
+    return ((((size_t)p1 ^ (size_t)p2) & LARGE_ALIGNMENT_MASK) == 0);
+#else
+    UNREFERENCED_PARAMETER(p1);
+    UNREFERENCED_PARAMETER(p2);
+    return TRUE;
+#endif // RESPECT_LARGE_ALIGNMENT
+}
+
+inline
+size_t switch_alignment_size (BOOL already_padded_p)
+{
+#ifndef RESPECT_LARGE_ALIGNMENT
+    assert (!"Should not be called");
+#endif // RESPECT_LARGE_ALIGNMENT
+
+    if (already_padded_p)
+        return DATA_ALIGNMENT;
+    else
+        return Align (min_obj_size) | DATA_ALIGNMENT;
+}
+
+#define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN)
+#define END_SPACE_AFTER_GC_FL (END_SPACE_AFTER_GC + Align (min_obj_size))
+
+inline
+size_t round_up_power2 (size_t size)
+{
+    // Get the 0-based index of the most-significant bit in size-1.
+    // If the call failed (because size-1 is zero), size must be 1,
+    // so return 1 (because 1 rounds up to itself).
+    DWORD highest_set_bit_index;
+    if (0 ==
+#ifdef HOST_64BIT
+        BitScanReverse64(
+#else
+        BitScanReverse(
+#endif
+            &highest_set_bit_index, size - 1)) { return 1; }
+
+    // The size == 0 case (which would have overflowed to SIZE_MAX when decremented)
+    // is handled below by relying on the fact that highest_set_bit_index is the maximum value
+    // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that
+    // number of bits shifts in zeros from the right, resulting in an output of zero.
+    return static_cast<size_t>(2) << highest_set_bit_index;
+}
+
+inline
+size_t round_down_power2 (size_t size)
+{
+    DWORD highest_set_bit_index;
+    if (0 ==
+#ifdef HOST_64BIT
+        BitScanReverse64(
+#else
+        BitScanReverse(
+#endif
+            &highest_set_bit_index, size)) { return 0; }
+
+    return static_cast<size_t>(1) << highest_set_bit_index;
+}
+
+extern size_t loh_size_threshold;
+
+inline
+float mb (size_t num)
+{
+    return (float)((float)num / 1000.0 / 1000.0);
+}
+
+inline
+uint32_t limit_time_to_uint32 (uint64_t time)
+{
+    time = min (time, (uint64_t)UINT32_MAX);
+    return (uint32_t)time;
+}
+
+inline
+size_t align_on_page (size_t add)
+{
+    return ((add + OS_PAGE_SIZE - 1) & ~((size_t)OS_PAGE_SIZE - 1));
+}
+
+inline
+uint8_t* align_on_page (uint8_t* add)
+{
+    return (uint8_t*)align_on_page ((size_t) add);
+}
+
+inline
+void memclr (uint8_t* mem, size_t size)
+{
+    dprintf (3, ("MEMCLR: %p, %zd", mem, size));
+    assert ((size & (sizeof(PTR_PTR) - 1)) == 0);
+    assert (sizeof(PTR_PTR) == DATA_ALIGNMENT);
+    memset (mem, 0, size);
+}
+
+inline
+size_t align_lower_page (size_t add)
+{
+    return (add & ~((size_t)OS_PAGE_SIZE - 1));
+}
+
+inline
+uint8_t* align_lower_page (uint8_t* add)
+{
+    return (uint8_t*)align_lower_page ((size_t)add);
+}
+
+#ifdef HOST_64BIT
+#define mark_bit_pitch ((size_t)16)
+#else
+#define mark_bit_pitch ((size_t)8)
+#endif //HOST_64BIT
+#define mark_word_width ((size_t)32)
+#define mark_word_size (mark_word_width * mark_bit_pitch)
+
+inline
+uint8_t* align_on_mark_word (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + mark_word_size - 1) & ~(mark_word_size - 1));
+}
+
+inline
+size_t mark_bit_of (uint8_t* add)
+{
+    return ((size_t)add / mark_bit_pitch);
+}
+
+inline
+unsigned int mark_bit_bit (size_t mark_bit)
+{
+    return (unsigned int)(mark_bit % mark_word_width);
+}
+
+inline
+size_t mark_bit_bit_of (uint8_t* add)
+{
+    return (((size_t)add / mark_bit_pitch) % mark_word_width);
+}
+
+inline
+size_t mark_bit_word (size_t mark_bit)
+{
+    return (mark_bit / mark_word_width);
+}
+
+inline
+size_t mark_word_of (uint8_t* add)
+{
+    return ((size_t)add) / mark_word_size;
+}
+
+inline
+uint8_t* mark_word_address (size_t wd)
+{
+    return (uint8_t*)(wd * mark_word_size);
+}
+
+#ifdef BACKGROUND_GC
+inline
+size_t& gc_heap::bpromoted_bytes (int thread)
+{
+#ifdef MULTIPLE_HEAPS
+    return g_bpromoted [thread * 16];
+#else //MULTIPLE_HEAPS
+    UNREFERENCED_PARAMETER(thread);
+    return g_bpromoted;
+#endif //MULTIPLE_HEAPS
+}
+
+inline
+unsigned int gc_heap::mark_array_marked (uint8_t* add)
+{
+    return mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add));
+}
+
+inline
+void gc_heap::mark_array_set_marked (uint8_t* add)
+{
+    size_t index = mark_word_of (add);
+    uint32_t val = (1 << mark_bit_bit_of (add));
+#ifdef MULTIPLE_HEAPS
+    Interlocked::Or (&(mark_array [index]), val);
+#else
+    mark_array [index] |= val;
+#endif
+}
+
+inline
+void gc_heap::mark_array_clear_marked (uint8_t* add)
+{
+    mark_array [mark_word_of (add)] &= ~(1 << mark_bit_bit_of (add));
+}
+
+#ifdef FEATURE_BASICFREEZE
+inline
+void gc_heap::seg_clear_mark_array_bits_soh (heap_segment* seg)
+{
+    uint8_t* range_beg = 0;
+    uint8_t* range_end = 0;
+    if (bgc_mark_array_range (seg, FALSE, &range_beg, &range_end))
+    {
+        clear_mark_array (range_beg, align_on_mark_word (range_end));
+    }
+}
+#endif //FEATURE_BASICFREEZE
+#endif //BACKGROUND_GC
+
+inline
+BOOL gc_heap::is_mark_set (uint8_t* o)
+{
+    return marked (o);
+}
+
+inline
+void gc_heap::pin_object (uint8_t* o, uint8_t** ppObject)
+{
+    dprintf (3, ("Pinning %zx->%zx", (size_t)ppObject, (size_t)o));
+    set_pinned (o);
+
+#ifdef FEATURE_EVENT_TRACE
+    if (EVENT_ENABLED(PinObjectAtGCTime))
+    {
+        fire_etw_pin_object_event (o, ppObject);
+    }
+#endif // FEATURE_EVENT_TRACE
+
+    num_pinned_objects++;
+}
+
+#define contain_pointers(i) header(i)->ContainsGCPointers()
+#ifdef COLLECTIBLE_CLASS
+#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointersOrCollectible()
+#define get_class_object(i) GCToEEInterface::GetLoaderAllocatorObjectForGC((Object *)i)
+#define is_collectible(i) method_table(i)->Collectible()
+#else //COLLECTIBLE_CLASS
+#define contain_pointers_or_collectible(i) header(i)->ContainsGCPointers()
+#endif //COLLECTIBLE_CLASS
+
+inline
+uint8_t*& card_table_lowest_address (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->lowest_address;
+}
+
+inline
+uint8_t*& card_table_highest_address (uint32_t* c_table)
+{
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->highest_address;
+}
+
+inline
+uint32_t*& card_table_next (uint32_t* c_table)
+{
+    // NOTE: The dac takes a dependency on card_table_info being right before c_table.
+    return ((card_table_info*)((uint8_t*)c_table - sizeof (card_table_info)))->next_card_table;
+}
+
+void destroy_card_table (uint32_t* c_table);
+
+#define new_start() {if (ppstop <= start) {break;} else {parm = start}}
+#define ignore_start 0
+#define use_start 1
+
+#define go_through_object(mt,o,size,parm,start,start_useful,limit,exp)      \
+{                                                                           \
+    CGCDesc* map = CGCDesc::GetCGCDescFromMT((MethodTable*)(mt));           \
+    CGCDescSeries* cur = map->GetHighestSeries();                           \
+    ptrdiff_t cnt = (ptrdiff_t) map->GetNumSeries();                        \
+                                                                            \
+    if (cnt >= 0)                                                           \
+    {                                                                       \
+        CGCDescSeries* last = map->GetLowestSeries();                       \
+        uint8_t** parm = 0;                                                 \
+        do                                                                  \
+        {                                                                   \
+            assert (parm <= (uint8_t**)((o) + cur->GetSeriesOffset()));     \
+            parm = (uint8_t**)((o) + cur->GetSeriesOffset());               \
+            uint8_t** ppstop =                                              \
+                (uint8_t**)((uint8_t*)parm + cur->GetSeriesSize() + (size));\
+            if (!start_useful || (uint8_t*)ppstop > (start))                \
+            {                                                               \
+                if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);\
+                while (parm < ppstop)                                       \
+                {                                                           \
+                   {exp}                                                    \
+                   parm++;                                                  \
+                }                                                           \
+            }                                                               \
+            cur--;                                                          \
+                                                                            \
+        } while (cur >= last);                                              \
+    }                                                                       \
+    else                                                                    \
+    {                                                                       \
+        /* Handle the repeating case - array of valuetypes */               \
+        uint8_t** parm = (uint8_t**)((o) + cur->startoffset);               \
+        if (start_useful && start > (uint8_t*)parm)                         \
+        {                                                                   \
+            ptrdiff_t cs = mt->RawGetComponentSize();                       \
+            parm = (uint8_t**)((uint8_t*)parm + (((start) - (uint8_t*)parm)/cs)*cs); \
+        }                                                                   \
+        while ((uint8_t*)parm < ((o)+(size)-plug_skew))                     \
+        {                                                                   \
+            for (ptrdiff_t __i = 0; __i > cnt; __i--)                       \
+            {                                                               \
+                HALF_SIZE_T skip =  (cur->val_serie + __i)->skip;           \
+                HALF_SIZE_T nptrs = (cur->val_serie + __i)->nptrs;          \
+                uint8_t** ppstop = parm + nptrs;                            \
+                if (!start_useful || (uint8_t*)ppstop > (start))            \
+                {                                                           \
+                    if (start_useful && (uint8_t*)parm < (start)) parm = (uint8_t**)(start);      \
+                    do                                                      \
+                    {                                                       \
+                       {exp}                                                \
+                       parm++;                                              \
+                    } while (parm < ppstop);                                \
+                }                                                           \
+                parm = (uint8_t**)((uint8_t*)ppstop + skip);                \
+            }                                                               \
+        }                                                                   \
+    }                                                                       \
+}
+
+#define go_through_object_nostart(mt,o,size,parm,exp) {go_through_object(mt,o,size,parm,o,ignore_start,(o + size),exp); }
+
+#ifndef COLLECTIBLE_CLASS
+#define go_through_object_cl(mt,o,size,parm,exp)                            \
+{                                                                           \
+    if (header(o)->ContainsGCPointers())                                    \
+    {                                                                       \
+        go_through_object_nostart(mt,o,size,parm,exp);                      \
+    }                                                                       \
+}
+#else //COLLECTIBLE_CLASS
+#define go_through_object_cl(mt,o,size,parm,exp)                            \
+{                                                                           \
+    if (header(o)->Collectible())                                           \
+    {                                                                       \
+        uint8_t* class_obj = get_class_object (o);                          \
+        uint8_t** parm = &class_obj;                                        \
+        do {exp} while (false);                                             \
+    }                                                                       \
+    if (header(o)->ContainsGCPointers())                                    \
+    {                                                                       \
+        go_through_object_nostart(mt,o,size,parm,exp);                      \
+    }                                                                       \
+}
+#endif //COLLECTIBLE_CLASS
+
+inline BOOL
+gc_heap::dt_high_memory_load_p()
+{
+    return ((settings.entry_memory_load >= high_memory_load_th) || g_low_memory_status);
+}
+
+#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64) || defined(TARGET_RISCV64)
+#ifndef PREFETCH
+#define PREFETCH
+#endif
+#endif
+
+#ifdef PREFETCH
+inline void Prefetch(void* addr)
+{
+#ifdef TARGET_WINDOWS
+
+#if defined(TARGET_AMD64) || defined(TARGET_X86)
+
+#ifndef _MM_HINT_T0
+#define _MM_HINT_T0 1
+#endif
+    _mm_prefetch((const char*)addr, _MM_HINT_T0);
+#elif defined(TARGET_ARM64)
+    __prefetch((const char*)addr);
+#endif //defined(TARGET_AMD64) || defined(TARGET_X86)
+
+#elif defined(TARGET_UNIX)
+    __builtin_prefetch(addr);
+#else //!(TARGET_WINDOWS || TARGET_UNIX)
+    UNREFERENCED_PARAMETER(addr);
+#endif //TARGET_WINDOWS
+}
+#else //PREFETCH
+inline void Prefetch (void* addr)
+{
+    UNREFERENCED_PARAMETER(addr);
+}
+#endif //PREFETCH
+
+#ifdef BACKGROUND_GC
+inline
+void gc_heap::bgc_track_uoh_alloc()
+{
+    if (current_c_gc_state == c_gc_state_planning)
+    {
+        Interlocked::Increment (&uoh_alloc_thread_count);
+        dprintf (3, ("h%d: inc lc: %d", heap_number, (int32_t)uoh_alloc_thread_count));
+    }
+}
+
+inline
+void gc_heap::bgc_untrack_uoh_alloc()
+{
+    if (current_c_gc_state == c_gc_state_planning)
+    {
+        Interlocked::Decrement (&uoh_alloc_thread_count);
+        dprintf (3, ("h%d: dec lc: %d", heap_number, (int32_t)uoh_alloc_thread_count));
+    }
+}
+
+#endif //BACKGROUND_GC
+
+inline
+BOOL gc_heap::ephemeral_pointer_p (uint8_t* o)
+{
+#ifdef USE_REGIONS
+    int gen_num = object_gennum ((uint8_t*)o);
+    assert (gen_num >= 0);
+    return (gen_num < max_generation);
+#else
+    return ((o >= ephemeral_low) && (o < ephemeral_high));
+#endif //USE_REGIONS
+}
+
+// Get the 0-based index of the most-significant bit in the value.
+// Returns -1 if the input value is zero (i.e. has no set bits).
+inline
+int index_of_highest_set_bit (size_t value)
+{
+    // Get the 0-based index of the most-significant bit in the value.
+    // If the call failed (because value is zero), return -1.
+    DWORD highest_set_bit_index;
+    return (0 ==
+#ifdef HOST_64BIT
+        BitScanReverse64(
+#else
+        BitScanReverse(
+#endif
+            &highest_set_bit_index, value)) ? -1 : static_cast<int>(highest_set_bit_index);
+}
+
+inline
+size_t gc_heap::generation_allocator_efficiency_percent (generation* inst)
+{
+#ifdef DYNAMIC_HEAP_COUNT
+    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
+    {
+        uint64_t total_plan_allocated = generation_total_plan_allocated (inst);
+        uint64_t condemned_allocated = generation_condemned_allocated (inst);
+        return ((total_plan_allocated == 0) ? 0 : (100 * (total_plan_allocated - condemned_allocated) / total_plan_allocated));
+    }
+    else
+#endif //DYNAMIC_HEAP_COUNT
+    {
+        uint64_t free_obj_space = generation_free_obj_space (inst);
+        uint64_t free_list_allocated = generation_free_list_allocated (inst);
+        if ((free_list_allocated + free_obj_space) == 0)
+        {
+            return 0;
+        }
+        else
+        {
+            return (size_t)((100 * free_list_allocated) / (free_list_allocated + free_obj_space));
+        }
+    }
+}
+
+inline
+size_t gc_heap::generation_unusable_fragmentation (generation* inst, int hn)
+{
+#ifdef DYNAMIC_HEAP_COUNT
+    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
+    {
+        uint64_t total_plan_allocated = generation_total_plan_allocated (inst);
+        uint64_t condemned_allocated = generation_condemned_allocated (inst);
+        uint64_t unusable_frag = 0;
+        size_t fo_space = (((ptrdiff_t)generation_free_obj_space (inst) < 0) ? 0 : generation_free_obj_space (inst));
+
+        if (total_plan_allocated != 0)
+        {
+            unusable_frag = fo_space + (condemned_allocated * generation_free_list_space (inst) / total_plan_allocated);
+        }
+
+        dprintf (3, ("h%d g%d FLa: %Id, ESa: %Id, Ca: %Id | FO: %Id, FL %Id, fl effi %.3f, unusable fl is %Id",
+            hn, inst->gen_num,
+            generation_free_list_allocated (inst), generation_end_seg_allocated (inst), (size_t)condemned_allocated,
+            fo_space, generation_free_list_space (inst),
+            ((total_plan_allocated == 0) ? 1.0 : ((float)(total_plan_allocated - condemned_allocated) / (float)total_plan_allocated)),
+            (size_t)unusable_frag));
+
+        return (size_t)unusable_frag;
+    }
+    else
+#endif //DYNAMIC_HEAP_COUNT
+    {
+        uint64_t free_obj_space = generation_free_obj_space (inst);
+        uint64_t free_list_allocated = generation_free_list_allocated (inst);
+        uint64_t free_list_space = generation_free_list_space (inst);
+        if ((free_list_allocated + free_obj_space) == 0)
+            return 0;
+        return (size_t)(free_obj_space + (free_obj_space * free_list_space) / (free_list_allocated + free_obj_space));
+    }
+}
+
+inline
+void gc_heap::check_loh_compact_mode (BOOL all_heaps_compacted_p)
+{
+    if (settings.loh_compaction && (loh_compaction_mode == loh_compaction_once))
+    {
+        if (all_heaps_compacted_p)
+        {
+            // If the compaction mode says to compact once and we are going to compact LOH,
+            // we need to revert it back to no compaction.
+            loh_compaction_mode = loh_compaction_default;
+        }
+    }
+}
+
+inline
+gc_history_global* gc_heap::get_gc_data_global()
+{
+#ifdef BACKGROUND_GC
+    return (settings.concurrent ? &bgc_data_global : &gc_data_global);
+#else
+    return &gc_data_global;
+#endif //BACKGROUND_GC
+}
+
+inline
+gc_history_per_heap* gc_heap::get_gc_data_per_heap()
+{
+#ifdef BACKGROUND_GC
+    return (settings.concurrent ? &bgc_data_per_heap : &gc_data_per_heap);
+#else
+    return &gc_data_per_heap;
+#endif //BACKGROUND_GC
+}
+
+inline
+size_t gc_heap::brick_of (uint8_t* add)
+{
+    return (size_t)(add - lowest_address) / brick_size;
+}
+
+inline
+uint8_t* gc_heap::brick_address (size_t brick)
+{
+    return lowest_address + (brick_size * brick);
+}
+
+//codes for the brick entries:
+//entry == 0 -> not assigned
+//entry >0 offset is entry-1
+//entry <0 jump back entry bricks
+inline
+void gc_heap::set_brick (size_t index, ptrdiff_t val)
+{
+    if (val < -32767)
+    {
+        val = -32767;
+    }
+    assert (val < 32767);
+    if (val >= 0)
+        brick_table [index] = (short)val+1;
+    else
+        brick_table [index] = (short)val;
+
+    dprintf (3, ("set brick[%zx] to %d\n", index, (short)val));
+}
+
+inline
+int gc_heap::get_brick_entry (size_t index)
+{
+#ifdef MULTIPLE_HEAPS
+    return VolatileLoadWithoutBarrier(&brick_table [index]);
+#else
+    return brick_table[index];
+#endif
+}
+
+inline
+uint8_t* gc_heap::card_address (size_t card)
+{
+    return  (uint8_t*) (card_size * card);
+}
+
+inline
+size_t gc_heap::card_of ( uint8_t* object)
+{
+    return (size_t)(object) / card_size;
+}
+
+inline
+void gc_heap::clear_card (size_t card)
+{
+    card_table [card_word (card)] =
+        (card_table [card_word (card)] & ~(1 << card_bit (card)));
+    dprintf (3,("Cleared card %zx [%zx, %zx[", card, (size_t)card_address (card),
+              (size_t)card_address (card+1)));
+}
+
+inline
+void gc_heap::set_card (size_t card)
+{
+    size_t word = card_word (card);
+    card_table[word] = (card_table [word] | (1 << card_bit (card)));
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    // Also set the card bundle that corresponds to the card
+    size_t bundle_to_set = cardw_card_bundle(word);
+
+    card_bundle_set(bundle_to_set);
+
+    dprintf (3,("Set card %zx [%zx, %zx[ and bundle %zx", card, (size_t)card_address (card), (size_t)card_address (card+1), bundle_to_set));
+#endif
+}
+
+inline
+BOOL  gc_heap::card_set_p (size_t card)
+{
+    return ( card_table [ card_word (card) ] & (1 << card_bit (card)));
+}
+
+inline
+int gc_heap::get_num_heaps()
+{
+#ifdef MULTIPLE_HEAPS
+    return n_heaps;
+#else
+    return 1;
+#endif //MULTIPLE_HEAPS
+}
+
+inline
+ptrdiff_t gc_heap::get_desired_allocation (int gen_number)
+{
+    return dd_desired_allocation (dynamic_data_of (gen_number));
+}
+
+inline
+ptrdiff_t  gc_heap::get_new_allocation (int gen_number)
+{
+    return dd_new_allocation (dynamic_data_of (gen_number));
+}
+
+//return the amount allocated so far in gen_number
+inline
+ptrdiff_t  gc_heap::get_allocation (int gen_number)
+{
+    dynamic_data* dd = dynamic_data_of (gen_number);
+
+    return dd_desired_allocation (dd) - dd_new_allocation (dd);
+}
+
+#ifdef BACKGROUND_GC
+inline
+BOOL gc_heap::background_marked (uint8_t* o)
+{
+    return mark_array_marked (o);
+}
+
+inline
+BOOL gc_heap::background_mark1 (uint8_t* o)
+{
+    BOOL to_mark = !mark_array_marked (o);
+
+    dprintf (3, ("b*%zx*b(%d)", (size_t)o, (to_mark ? 1 : 0)));
+    if (to_mark)
+    {
+        mark_array_set_marked (o);
+        dprintf (4, ("n*%zx*n", (size_t)o));
+        return TRUE;
+    }
+    else
+        return FALSE;
+}
+
+// TODO: we could consider filtering out NULL's here instead of going to
+// look for it on other heaps
+inline
+BOOL gc_heap::background_mark (uint8_t* o, uint8_t* low, uint8_t* high)
+{
+    BOOL marked = FALSE;
+    if ((o >= low) && (o < high))
+        marked = background_mark1 (o);
+#ifdef MULTIPLE_HEAPS
+    else if (o)
+    {
+        gc_heap* hp = heap_of (o);
+        assert (hp);
+        if ((o >= hp->background_saved_lowest_address) && (o < hp->background_saved_highest_address))
+            marked = background_mark1 (o);
+    }
+#endif //MULTIPLE_HEAPS
+    return marked;
+}
+
+#endif //BACKGROUND_GC
+
+inline
+size_t size_mark_array_of (uint8_t* from, uint8_t* end)
+{
+    assert (((size_t)from & ((mark_word_size)-1)) == 0);
+    assert (((size_t)end  & ((mark_word_size)-1)) == 0);
+    return sizeof (uint32_t)*(((end - from) / mark_word_size));
+}
+
+inline
+mark* gc_heap::pinned_plug_of (size_t bos)
+{
+    return &mark_stack_array [ bos ];
+}
+
+inline
+mark* gc_heap::oldest_pin ()
+{
+    return pinned_plug_of (mark_stack_bos);
+}
+
+inline
+BOOL gc_heap::pinned_plug_que_empty_p ()
+{
+    return (mark_stack_bos == mark_stack_tos);
+}
+
+#ifdef FEATURE_LOH_COMPACTION
+inline
+BOOL gc_heap::loh_pinned_plug_que_empty_p()
+{
+    return (loh_pinned_queue_bos == loh_pinned_queue_tos);
+}
+#endif // FEATURE_LOH_COMPACTION
+
+inline
+mark* gc_heap::loh_pinned_plug_of (size_t bos)
+{
+    return &loh_pinned_queue[bos];
+}
+
+#ifdef USE_REGIONS
+inline bool gc_heap::is_in_gc_range (uint8_t* o)
+{
+#ifdef FEATURE_BASICFREEZE
+    // we may have frozen objects in read only segments
+    // outside of the reserved address range of the gc heap
+    assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) ||
+        (o == nullptr) || (ro_segment_lookup (o) != nullptr));
+#else //FEATURE_BASICFREEZE
+    // without frozen objects, every non-null pointer must be
+    // within the heap
+    assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address));
+#endif //FEATURE_BASICFREEZE
+    return ((gc_low <= o) && (o < gc_high));
+}
+#endif //USE_REGIONS
+
+#ifdef FEATURE_EVENT_TRACE
+inline
+void gc_heap::record_mark_time (uint64_t& mark_time,
+                                uint64_t& current_mark_time,
+                                uint64_t& last_mark_time)
+{
+    if (informational_event_enabled_p)
+    {
+        current_mark_time = GetHighPrecisionTimeStamp();
+        mark_time = limit_time_to_uint32 (current_mark_time - last_mark_time);
+        dprintf (3, ("%zd - %zd = %zd",
+            current_mark_time, last_mark_time, (current_mark_time - last_mark_time)));
+        last_mark_time = current_mark_time;
+    }
+}
+#endif //FEATURE_EVENT_TRACE
+
+inline
+void gc_heap::init_alloc_info (generation* gen, heap_segment* seg)
+{
+    generation_allocation_segment (gen) = seg;
+    generation_allocation_pointer (gen) = heap_segment_mem (seg);
+    generation_allocation_limit (gen) = generation_allocation_pointer (gen);
+    generation_allocation_context_start_region (gen) = generation_allocation_pointer (gen);
+}
+
+inline
+uint8_t* pinned_plug (mark* m)
+{
+   return m->first;
+}
+
+inline
+size_t& pinned_len (mark* m)
+{
+    return m->len;
+}
+
+inline
+void set_new_pin_info (mark* m, uint8_t* pin_free_space_start)
+{
+    m->len = pinned_plug (m) - pin_free_space_start;
+#ifdef SHORT_PLUGS
+    m->allocation_context_start_region = pin_free_space_start;
+#endif //SHORT_PLUGS
+}
+
+inline
+void gc_heap::update_oldest_pinned_plug()
+{
+    oldest_pinned_plug = (pinned_plug_que_empty_p() ? 0 : pinned_plug (oldest_pin()));
+}
+
+#if !defined(_MSC_VER) && !defined(__cdecl)
+#if defined(TARGET_X86)
+#define __cdecl __attribute__((cdecl))
+#else // TARGET_X86
+#define __cdecl
+#endif // TARGET_X86
+#endif // !_MSC_VER && !__cdecl
+
+#ifdef FEATURE_STRUCTALIGN
+#if defined (TARGET_AMD64)
+#define brick_bits (12)
+#else
+#define brick_bits (11)
+#endif //TARGET_AMD64
+static_assert(brick_size == (1 << brick_bits));
+
+#define child_bits (brick_bits + 1 - LOG2_PTRSIZE)
+#define pad_bits (sizeof(short) * 8 - child_bits)
+
+#define child_from_short(w) (((signed short)(w) / (1 << (pad_bits - LOG2_PTRSIZE))) & ~((1 << LOG2_PTRSIZE) - 1))
+#define pad_mask ((1 << pad_bits) - 1)
+#define pad_from_short(w) ((size_t)(w) & pad_mask)
+#else // FEATURE_STRUCTALIGN
+#define child_from_short(w) (w)
+#endif // FEATURE_STRUCTALIGN
+
+inline
+short node_left_child(uint8_t* node)
+{
+    return child_from_short(((plug_and_pair*)node)[-1].m_pair.left);
+}
+
+inline
+short node_right_child(uint8_t* node)
+{
+    return child_from_short(((plug_and_pair*)node)[-1].m_pair.right);
+}
+
+inline
+size_t node_gap_size (uint8_t* node)
+{
+    return ((plug_and_gap*)node)[-1].gap;
+}
+
+inline
+ptrdiff_t loh_node_relocation_distance(uint8_t* node)
+{
+    return (((loh_obj_and_pad*)node)[-1].reloc);
+}
+
+inline
+void loh_set_node_relocation_distance(uint8_t* node, ptrdiff_t val)
+{
+    ptrdiff_t* place = &(((loh_obj_and_pad*)node)[-1].reloc);
+    *place = val;
+}
+
+inline
+ptrdiff_t node_relocation_distance (uint8_t* node)
+{
+    return (((plug_and_reloc*)(node))[-1].reloc & ~3);
+}
+
+inline
+void set_node_relocation_distance(uint8_t* node, ptrdiff_t val)
+{
+    assert (val == (val & ~3));
+    ptrdiff_t* place = &(((plug_and_reloc*)node)[-1].reloc);
+    *place &= 1;
+    *place |= val;
+}
+
+#define node_left_p(node) (((plug_and_reloc*)(node))[-1].reloc & 2)
+#define set_node_left(node) ((plug_and_reloc*)(node))[-1].reloc |= 2
+
+inline
+void set_node_left_child(uint8_t* node, ptrdiff_t val)
+{
+    assert (val > -(ptrdiff_t)brick_size);
+    assert (val < (ptrdiff_t)brick_size);
+    assert (Aligned (val));
+#ifdef FEATURE_STRUCTALIGN
+    size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.left);
+    ((plug_and_pair*)node)[-1].m_pair.left = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad;
+#else // FEATURE_STRUCTALIGN
+    ((plug_and_pair*)node)[-1].m_pair.left = (short)val;
+#endif // FEATURE_STRUCTALIGN
+    assert (node_left_child (node) == val);
+}
+
+inline
+void set_node_right_child(uint8_t* node, ptrdiff_t val)
+{
+    assert (val > -(ptrdiff_t)brick_size);
+    assert (val < (ptrdiff_t)brick_size);
+    assert (Aligned (val));
+#ifdef FEATURE_STRUCTALIGN
+    size_t pad = pad_from_short(((plug_and_pair*)node)[-1].m_pair.right);
+    ((plug_and_pair*)node)[-1].m_pair.right = ((short)val << (pad_bits - LOG2_PTRSIZE)) | (short)pad;
+#else // FEATURE_STRUCTALIGN
+    ((plug_and_pair*)node)[-1].m_pair.right = (short)val;
+#endif // FEATURE_STRUCTALIGN
+    assert (node_right_child (node) == val);
+}
+
+inline
+void set_gap_size (uint8_t* node, size_t size)
+{
+    assert (Aligned (size));
+
+    ((plug_and_gap *)node)[-1].reloc = 0;
+    ((plug_and_gap *)node)[-1].lr = 0;
+    ((plug_and_gap *)node)[-1].gap = size;
+
+    assert ((size == 0) || (size >= sizeof(plug_and_reloc)));
+}
+
+inline
+uint8_t* tree_search (uint8_t* tree, uint8_t* old_address)
+{
+    uint8_t* candidate = 0;
+    int cn;
+    while (1)
+    {
+        if (tree < old_address)
+        {
+            if ((cn = node_right_child (tree)) != 0)
+            {
+                assert (candidate < tree);
+                candidate = tree;
+                tree = tree + cn;
+                Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left);
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else if (tree > old_address)
+        {
+            if ((cn = node_left_child (tree)) != 0)
+            {
+                tree = tree + cn;
+                Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left);
+                continue;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    if (tree <= old_address)
+        return tree;
+    else if (candidate)
+        return candidate;
+    else
+        return tree;
+}
+
+#ifdef DOUBLY_LINKED_FL
+inline
+BOOL is_plug_bgc_mark_bit_set (uint8_t* node)
+{
+    return header(node)->IsBGCMarkBitSet();
+}
+
+inline
+void clear_plug_bgc_mark_bit (uint8_t* node)
+{
+    header(node)->ClearBGCMarkBit();
+}
+
+inline
+BOOL is_free_obj_in_compact_bit_set (uint8_t* node)
+{
+    return header(node)->IsFreeObjInCompactBitSet();
+}
+
+inline
+void clear_free_obj_in_compact_bit (uint8_t* node)
+{
+    header(node)->ClearFreeObjInCompactBit();
+}
+
+inline
+BOOL is_on_free_list (uint8_t* o, size_t size)
+{
+    if (size >= min_free_list)
+    {
+        if (header(o)->GetMethodTable() == g_gc_pFreeObjectMethodTable)
+        {
+            return (free_list_prev (o) != PREV_EMPTY);
+        }
+    }
+
+    return FALSE;
+}
+#endif //DOUBLY_LINKED_FL
+
+#ifdef SHORT_PLUGS
+inline
+void clear_plug_padded (uint8_t* node)
+{
+    header(node)->ClearMarked();
+}
+#else //SHORT_PLUGS
+inline
+void clear_plug_padded (uint8_t* node)
+{
+    UNREFERENCED_PARAMETER(node);
+}
+#endif //SHORT_PLUGS
+
+inline
+heap_segment* heap_segment_non_sip (heap_segment* ns)
+{
+#ifdef USE_REGIONS
+    if ((ns == 0) || !heap_segment_swept_in_plan (ns))
+    {
+        return ns;
+    }
+    else
+    {
+        do
+        {
+            if (heap_segment_swept_in_plan (ns))
+            {
+                dprintf (REGIONS_LOG, ("region %p->%p SIP",
+                    heap_segment_mem (ns), heap_segment_allocated (ns)));
+            }
+
+            ns = heap_segment_next (ns);
+        } while ((ns != 0) && heap_segment_swept_in_plan (ns));
+        return ns;
+    }
+#else //USE_REGIONS
+    return ns;
+#endif //USE_REGIONS
+}
+
+inline
+heap_segment* heap_segment_next_non_sip (heap_segment* seg)
+{
+    heap_segment* ns = heap_segment_next (seg);
+#ifdef USE_REGIONS
+    return heap_segment_non_sip (ns);
+#else
+    return ns;
+#endif //USE_REGIONS
+}
+
+inline
+static void safe_switch_to_thread()
+{
+    bool cooperative_mode = gc_heap::enable_preemptive();
+
+    GCToOSInterface::YieldThread(0);
+
+    gc_heap::disable_preemptive(cooperative_mode);
+}
+
+void WaitLongerNoInstru (int i);
+
+extern const int32_t lock_free;
+extern const int32_t lock_taken;
+extern const int32_t lock_decommissioned;
+
+//
+// We need the following methods to have volatile arguments, so that they can accept
+// raw pointers in addition to the results of the & operator on Volatile<T>.
+// this will never be used for the more_space_lock_xxx, which is why
+// "lock_decommissioned" cannot happen.
+inline
+static void enter_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock)
+{
+retry:
+
+    if (Interlocked::CompareExchange(lock, lock_taken, lock_free) != lock_free)
+    {
+        unsigned int i = 0;
+        while (VolatileLoad(lock) != lock_free)
+        {
+            // will never be used for more_space_lock_xxx
+            assert (VolatileLoad(lock) != lock_decommissioned);
+            if ((++i & 7) && !IsGCInProgress())
+            {
+                if  (g_num_processors > 1)
+                {
+#ifndef MULTIPLE_HEAPS
+                    int spin_count = 32 * yp_spin_count_unit;
+#else //!MULTIPLE_HEAPS
+                    int spin_count = yp_spin_count_unit;
+#endif //!MULTIPLE_HEAPS
+                    for (int j = 0; j < spin_count; j++)
+                    {
+                        if  (VolatileLoad(lock) == lock_free || IsGCInProgress())
+                            break;
+                        YieldProcessor();           // indicate to the processor that we are spinning
+                    }
+                    if  (VolatileLoad(lock) != lock_free && !IsGCInProgress())
+                    {
+                        safe_switch_to_thread();
+                    }
+                }
+                else
+                {
+                    safe_switch_to_thread();
+                }
+            }
+            else
+            {
+                WaitLongerNoInstru(i);
+            }
+        }
+        goto retry;
+    }
+}
+
+inline
+static BOOL try_enter_spin_lock_noinstru(RAW_KEYWORD(volatile) int32_t* lock)
+{
+    return (Interlocked::CompareExchange(&*lock, lock_taken, lock_free) == lock_free);
+}
+
+inline
+static void leave_spin_lock_noinstru (RAW_KEYWORD(volatile) int32_t* lock)
+{
+    VolatileStore<int32_t>((int32_t*)lock, lock_free);
+}
+
+inline
+BOOL power_of_two_p (size_t integer)
+{
+    return !(integer & (integer-1));
+}
+
+#ifdef FEATURE_STRUCTALIGN
+void set_node_aligninfo (uint8_t *node, int requiredAlignment, ptrdiff_t pad);
+void clear_node_aligninfo (uint8_t *node);
+#else // FEATURE_STRUCTALIGN
+#define node_realigned(node)    (((plug_and_reloc*)(node))[-1].reloc & 1)
+void set_node_realigned (uint8_t* node);
+void clear_node_realigned(uint8_t* node);
+#endif // FEATURE_STRUCTALIGN
+
+#define OBJECT_ALIGNMENT_OFFSET (sizeof(MethodTable *))
+
+#ifdef FEATURE_STRUCTALIGN
+#define MAX_STRUCTALIGN OS_PAGE_SIZE
+#else // FEATURE_STRUCTALIGN
+#define MAX_STRUCTALIGN 0
+#endif // FEATURE_STRUCTALIGN
+
+#ifdef FEATURE_STRUCTALIGN
+inline
+ptrdiff_t AdjustmentForMinPadSize(ptrdiff_t pad, int requiredAlignment)
+{
+    // The resulting alignpad must be either 0 or at least min_obj_size.
+    // Note that by computing the following difference on unsigned types,
+    // we can do the range check 0 < alignpad < min_obj_size with a
+    // single conditional branch.
+    if ((size_t)(pad - DATA_ALIGNMENT) < Align (min_obj_size) - DATA_ALIGNMENT)
+    {
+        return requiredAlignment;
+    }
+    return 0;
+}
+
+
+inline
+ptrdiff_t ComputeStructAlignPad (uint8_t* plug, int requiredAlignment, size_t alignmentOffset=OBJECT_ALIGNMENT_OFFSET)
+{
+    return StructAlign (plug, requiredAlignment, alignmentOffset) - plug;
+}
+
+inline
+BOOL IsStructAligned (uint8_t *ptr, int requiredAlignment)
+{
+    return StructAlign (ptr, requiredAlignment) == ptr;
+}
+
+inline
+ptrdiff_t ComputeMaxStructAlignPad (int requiredAlignment)
+{
+    if (requiredAlignment == DATA_ALIGNMENT)
+        return 0;
+    // Since a non-zero alignment padding cannot be less than min_obj_size (so we can fit the
+    // alignment padding object), the worst-case alignment padding is correspondingly larger
+    // than the required alignment.
+    return requiredAlignment + Align (min_obj_size) - DATA_ALIGNMENT;
+}
+
+inline
+ptrdiff_t ComputeMaxStructAlignPadLarge (int requiredAlignment)
+{
+    if (requiredAlignment <= get_alignment_constant (TRUE)+1)
+        return 0;
+    // This is the same as ComputeMaxStructAlignPad, except that in addition to leaving space
+    // for padding before the actual object, it also leaves space for filling a gap after the
+    // actual object.  This is needed on the large object heap, as the outer allocation functions
+    // don't operate on an allocation context (which would have left space for the final gap).
+    return requiredAlignment + Align (min_obj_size) * 2 - DATA_ALIGNMENT;
+}
+
+#else // FEATURE_STRUCTALIGN
+#define ComputeMaxStructAlignPad(requiredAlignment) 0
+#define ComputeMaxStructAlignPadLarge(requiredAlignment) 0
+#endif // FEATURE_STRUCTALIGN
+
+#ifndef FEATURE_STRUCTALIGN
+#define node_realigned(node)    (((plug_and_reloc*)(node))[-1].reloc & 1)
+void set_node_realigned (uint8_t* node);
+#endif // FEATURE_STRUCTALIGN
+
+#define commit_min_th (16*OS_PAGE_SIZE)
+#define UOH_ALLOCATION_RETRY_MAX_COUNT 2
+
+#ifdef TRACE_GC
+extern const char* const allocation_state_str[];
+#endif //TRACE_GC
+
+extern const size_t etw_allocation_tick;
+extern const size_t fgn_check_quantum;
+
+#ifdef BACKGROUND_GC
+extern uint32_t bgc_alloc_spin_count;
+extern uint32_t bgc_alloc_spin;
+#endif //BACKGROUND_GC
+
+#define check_msl_status(msg, size) if (msl_status == msl_retry_different_heap) \
+    { \
+        dprintf (5555, ("h%d RETRY %s(%Id)", heap_number, msg, size)); \
+        return a_state_retry_allocate; \
+    }
+
+#ifdef DOUBLY_LINKED_FL
+inline
+void set_plug_bgc_mark_bit (uint8_t* node)
+{
+    header(node)->SetBGCMarkBit();
+}
+
+inline
+void set_free_obj_in_compact_bit (uint8_t* node)
+{
+    header(node)->SetFreeObjInCompactBit();
+}
+#endif //DOUBLY_LINKED_FL
+
+#ifdef SHORT_PLUGS
+inline
+void set_plug_padded (uint8_t* node)
+{
+    header(node)->SetMarked();
+}
+
+inline
+BOOL is_plug_padded (uint8_t* node)
+{
+    return header(node)->IsMarked();
+}
+#else //SHORT_PLUGS
+inline
+void set_plug_padded (uint8_t* node)
+{
+    UNREFERENCED_PARAMETER(node);
+}
+
+inline
+BOOL is_plug_padded (uint8_t* node)
+{
+    UNREFERENCED_PARAMETER(node);
+    return FALSE;
+}
+#endif //SHORT_PLUGS
+
+#ifdef SHORT_PLUGS
+inline
+uint8_t*& pin_allocation_context_start_region (mark* m)
+{
+    return m->allocation_context_start_region;
+}
+
+inline
+uint8_t* get_plug_start_in_saved (uint8_t* old_loc, mark* pinned_plug_entry)
+{
+    uint8_t* saved_pre_plug_info = (uint8_t*)(pinned_plug_entry->get_pre_plug_reloc_info());
+    uint8_t* plug_start_in_saved = saved_pre_plug_info + (old_loc - (pinned_plug (pinned_plug_entry) - sizeof (plug_and_gap)));
+    dprintf (2, ("EP: %p(%p), %p", old_loc, pinned_plug (pinned_plug_entry), plug_start_in_saved));
+    return plug_start_in_saved;
+}
+#endif //SHORT_PLUGS
+
+#ifndef USE_REGIONS
+class seg_free_spaces
+{
+    struct seg_free_space
+    {
+        BOOL is_plug;
+        void* start;
+    };
+
+    struct free_space_bucket
+    {
+        seg_free_space* free_space;
+        ptrdiff_t count_add;
+        ptrdiff_t count_fit;
+    };
+
+    void move_bucket (int old_power2, int new_power2)
+    {
+        assert (old_power2 >= 0);
+        assert (old_power2 >= new_power2);
+
+        if (old_power2 == new_power2)
+        {
+            return;
+        }
+
+        seg_free_space* src_index = free_space_buckets[old_power2].free_space;
+        for (int i = old_power2; i > new_power2; i--)
+        {
+            seg_free_space** dest = &(free_space_buckets[i].free_space);
+            (*dest)++;
+
+            seg_free_space* dest_index = free_space_buckets[i - 1].free_space;
+            if (i > (new_power2 + 1))
+            {
+                seg_free_space temp = *src_index;
+                *src_index = *dest_index;
+                *dest_index = temp;
+            }
+            src_index = dest_index;
+        }
+
+        free_space_buckets[old_power2].count_fit--;
+        free_space_buckets[new_power2].count_fit++;
+    }
+
+#ifdef _DEBUG
+    void dump_free_space (seg_free_space* item)
+    {
+        uint8_t* addr = 0;
+        size_t len = 0;
+
+        if (item->is_plug)
+        {
+            mark* m = (mark*)(item->start);
+            len = pinned_len (m);
+            addr = pinned_plug (m) - len;
+        }
+        else
+        {
+            heap_segment* seg = (heap_segment*)(item->start);
+            addr = heap_segment_plan_allocated (seg);
+            len = heap_segment_committed (seg) - addr;
+        }
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]0x%p %zd", heap_num, addr, len));
+    }
+
+    void dump()
+    {
+        seg_free_space* item = NULL;
+        int i = 0;
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------\nnow the free spaces look like:", heap_num));
+        for (i = 0; i < (free_space_bucket_count - 1); i++)
+        {
+            dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i)));
+            dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len"));
+            item = free_space_buckets[i].free_space;
+            while (item < free_space_buckets[i + 1].free_space)
+            {
+                dump_free_space (item);
+                item++;
+            }
+            dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num));
+        }
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Free spaces for 2^%d bucket:", heap_num, (base_power2 + i)));
+        dprintf (SEG_REUSE_LOG_1, ("[%d]%s %s", heap_num, "start", "len"));
+        item = free_space_buckets[i].free_space;
+
+        while (item <= &seg_free_space_array[free_space_item_count - 1])
+        {
+            dump_free_space (item);
+            item++;
+        }
+        dprintf (SEG_REUSE_LOG_1, ("[%d]----------------------------------", heap_num));
+    }
+#endif //_DEBUG
+
+    free_space_bucket* free_space_buckets;
+    seg_free_space* seg_free_space_array;
+    ptrdiff_t free_space_bucket_count;
+    ptrdiff_t free_space_item_count;
+    int base_power2;
+    int heap_num;
+#ifdef _DEBUG
+    BOOL has_end_of_seg;
+#endif //_DEBUG
+
+public:
+    seg_free_spaces (int h_number)
+    {
+        heap_num = h_number;
+    }
+
+    BOOL alloc ()
+    {
+        size_t total_prealloc_size =
+            MAX_NUM_BUCKETS * sizeof (free_space_bucket) +
+            MAX_NUM_FREE_SPACES * sizeof (seg_free_space);
+
+        free_space_buckets = (free_space_bucket*) new (nothrow) uint8_t[total_prealloc_size];
+
+        return (!!free_space_buckets);
+    }
+
+    void add_buckets (int base, size_t* ordered_free_spaces, int bucket_count, size_t item_count)
+    {
+        assert (free_space_buckets);
+        assert (item_count <= (size_t)MAX_PTR);
+
+        free_space_bucket_count = bucket_count;
+        free_space_item_count = item_count;
+        base_power2 = base;
+#ifdef _DEBUG
+        has_end_of_seg = FALSE;
+#endif //_DEBUG
+
+        ptrdiff_t total_item_count = 0;
+        ptrdiff_t i = 0;
+
+        seg_free_space_array = (seg_free_space*)(free_space_buckets + free_space_bucket_count);
+
+        for (i = 0; i < (ptrdiff_t)item_count; i++)
+        {
+            seg_free_space_array[i].start = 0;
+            seg_free_space_array[i].is_plug = FALSE;
+        }
+
+        for (i = 0; i < bucket_count; i++)
+        {
+            free_space_buckets[i].count_add = ordered_free_spaces[i];
+            free_space_buckets[i].count_fit = ordered_free_spaces[i];
+            free_space_buckets[i].free_space = &seg_free_space_array[total_item_count];
+            total_item_count += free_space_buckets[i].count_add;
+        }
+
+        assert (total_item_count == (ptrdiff_t)item_count);
+    }
+
+    void add (void* start, BOOL plug_p, BOOL first_p)
+    {
+        size_t size = (plug_p ?
+                       pinned_len ((mark*)start) :
+                       (heap_segment_committed ((heap_segment*)start) -
+                           heap_segment_plan_allocated ((heap_segment*)start)));
+
+        if (plug_p)
+        {
+            dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space before plug: %zd", heap_num, size));
+        }
+        else
+        {
+            dprintf (SEG_REUSE_LOG_1, ("[%d]Adding a free space at end of seg: %zd", heap_num, size));
+#ifdef _DEBUG
+            has_end_of_seg = TRUE;
+#endif //_DEBUG
+        }
+
+        if (first_p)
+        {
+            size_t eph_gen_starts = gc_heap::eph_gen_starts_size;
+            size -= eph_gen_starts;
+            if (plug_p)
+            {
+                mark* m = (mark*)(start);
+                pinned_len (m) -= eph_gen_starts;
+            }
+            else
+            {
+                heap_segment* seg = (heap_segment*)start;
+                heap_segment_plan_allocated (seg) += eph_gen_starts;
+            }
+        }
+
+        int bucket_power2 = index_of_highest_set_bit (size);
+        if (bucket_power2 < base_power2)
+        {
+            return;
+        }
+
+        free_space_bucket* bucket = &free_space_buckets[bucket_power2 - base_power2];
+
+        seg_free_space* bucket_free_space = bucket->free_space;
+        assert (plug_p || (!plug_p && bucket->count_add));
+
+        if (bucket->count_add == 0)
+        {
+            dprintf (SEG_REUSE_LOG_1, ("[%d]Already have enough of 2^%d", heap_num, bucket_power2));
+            return;
+        }
+
+        ptrdiff_t index = bucket->count_add - 1;
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Building free spaces: adding %p; len: %zd (2^%d)",
+                    heap_num,
+                    (plug_p ?
+                        (pinned_plug ((mark*)start) - pinned_len ((mark*)start)) :
+                        heap_segment_plan_allocated ((heap_segment*)start)),
+                    size,
+                    bucket_power2));
+
+        if (plug_p)
+        {
+            bucket_free_space[index].is_plug = TRUE;
+        }
+
+        bucket_free_space[index].start = start;
+        bucket->count_add--;
+    }
+
+#ifdef _DEBUG
+    void check()
+    {
+        ptrdiff_t i = 0;
+        int end_of_seg_count = 0;
+
+        for (i = 0; i < free_space_item_count; i++)
+        {
+            assert (seg_free_space_array[i].start);
+            if (!(seg_free_space_array[i].is_plug))
+            {
+                end_of_seg_count++;
+            }
+        }
+
+        if (has_end_of_seg)
+        {
+            assert (end_of_seg_count == 1);
+        }
+        else
+        {
+            assert (end_of_seg_count == 0);
+        }
+
+        for (i = 0; i < free_space_bucket_count; i++)
+        {
+            assert (free_space_buckets[i].count_add == 0);
+        }
+    }
+#endif //_DEBUG
+
+    uint8_t* fit (uint8_t* old_loc,
+               size_t plug_size
+               REQD_ALIGN_AND_OFFSET_DCL)
+    {
+        if (old_loc)
+        {
+#ifdef SHORT_PLUGS
+            assert (!is_plug_padded (old_loc));
+#endif //SHORT_PLUGS
+            assert (!node_realigned (old_loc));
+        }
+
+        size_t saved_plug_size = plug_size;
+
+#ifdef FEATURE_STRUCTALIGN
+        _ASSERTE(requiredAlignment == DATA_ALIGNMENT && false);
+#endif // FEATURE_STRUCTALIGN
+
+        size_t plug_size_to_fit = plug_size;
+
+#ifdef RESPECT_LARGE_ALIGNMENT
+        plug_size_to_fit += switch_alignment_size(FALSE);
+#endif //RESPECT_LARGE_ALIGNMENT
+
+        int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
+        ptrdiff_t i;
+        uint8_t* new_address = 0;
+
+        if (plug_power2 < base_power2)
+        {
+            plug_power2 = base_power2;
+        }
+
+        int chosen_power2 = plug_power2 - base_power2;
+    retry:
+        for (i = chosen_power2; i < free_space_bucket_count; i++)
+        {
+            if (free_space_buckets[i].count_fit != 0)
+            {
+                break;
+            }
+            chosen_power2++;
+        }
+
+        dprintf (SEG_REUSE_LOG_1, ("[%d]Fitting plug len %zd (2^%d) using 2^%d free space",
+            heap_num,
+            plug_size,
+            plug_power2,
+            (chosen_power2 + base_power2)));
+
+        assert (i < free_space_bucket_count);
+
+        seg_free_space* bucket_free_space = free_space_buckets[chosen_power2].free_space;
+        ptrdiff_t free_space_count = free_space_buckets[chosen_power2].count_fit;
+        size_t new_free_space_size = 0;
+        BOOL can_fit = FALSE;
+        size_t pad = 0;
+
+        for (i = 0; i < free_space_count; i++)
+        {
+            size_t free_space_size = 0;
+            pad = 0;
+
+            if (bucket_free_space[i].is_plug)
+            {
+                mark* m = (mark*)(bucket_free_space[i].start);
+                uint8_t* plug_free_space_start = pinned_plug (m) - pinned_len (m);
+
+                if (!((old_loc == 0) || same_large_alignment_p (old_loc, plug_free_space_start)))
+                {
+                    pad = switch_alignment_size (FALSE);
+                }
+
+                plug_size = saved_plug_size + pad;
+
+                free_space_size = pinned_len (m);
+                new_address = pinned_plug (m) - pinned_len (m);
+
+                if (free_space_size >= (plug_size + Align (min_obj_size)) ||
+                    free_space_size == plug_size)
+                {
+                    new_free_space_size = free_space_size - plug_size;
+                    pinned_len (m) = new_free_space_size;
+#ifdef SIMPLE_DPRINTF
+                    dprintf (SEG_REUSE_LOG_0, ("[%d]FP: 0x%p->0x%p(%zx)(%zx), [0x%p (2^%d) -> [0x%p (2^%d)",
+                                heap_num,
+                                old_loc,
+                                new_address,
+                                (plug_size - pad),
+                                pad,
+                                pinned_plug (m),
+                                index_of_highest_set_bit (free_space_size),
+                                (pinned_plug (m) - pinned_len (m)),
+                                index_of_highest_set_bit (new_free_space_size)));
+#endif //SIMPLE_DPRINTF
+
+                    if (pad != 0)
+                    {
+                        set_node_realigned (old_loc);
+                    }
+
+                    can_fit = TRUE;
+                }
+            }
+            else
+            {
+                heap_segment* seg = (heap_segment*)(bucket_free_space[i].start);
+                free_space_size = heap_segment_committed (seg) - heap_segment_plan_allocated (seg);
+
+                if (!((old_loc == 0) || same_large_alignment_p (old_loc, heap_segment_plan_allocated (seg))))
+                {
+                    pad = switch_alignment_size (FALSE);
+                }
+
+                plug_size = saved_plug_size + pad;
+
+                if (free_space_size >= (plug_size + Align (min_obj_size)) ||
+                    free_space_size == plug_size)
+                {
+                    new_address = heap_segment_plan_allocated (seg);
+                    new_free_space_size = free_space_size - plug_size;
+                    heap_segment_plan_allocated (seg) = new_address + plug_size;
+#ifdef SIMPLE_DPRINTF
+                    dprintf (SEG_REUSE_LOG_0, ("[%d]FS: 0x%p-> 0x%p(%zd) (2^%d) -> 0x%p (2^%d)",
+                                heap_num,
+                                old_loc,
+                                new_address,
+                                (plug_size - pad),
+                                index_of_highest_set_bit (free_space_size),
+                                heap_segment_plan_allocated (seg),
+                                index_of_highest_set_bit (new_free_space_size)));
+#endif //SIMPLE_DPRINTF
+
+                    if (pad != 0)
+                        set_node_realigned (old_loc);
+
+                    can_fit = TRUE;
+                }
+            }
+
+            if (can_fit)
+            {
+                break;
+            }
+        }
+
+        if (!can_fit)
+        {
+            assert (chosen_power2 == 0);
+            chosen_power2 = 1;
+            goto retry;
+        }
+
+        new_address += pad;
+        assert ((chosen_power2 && (i == 0)) ||
+                ((!chosen_power2) && (i < free_space_count)));
+
+        int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size);
+
+        if (new_bucket_power2 < base_power2)
+        {
+            new_bucket_power2 = base_power2;
+        }
+
+        move_bucket (chosen_power2, new_bucket_power2 - base_power2);
+
+        return new_address;
+    }
+
+    void cleanup ()
+    {
+        if (free_space_buckets)
+        {
+            delete [] free_space_buckets;
+        }
+        if (seg_free_space_array)
+        {
+            delete [] seg_free_space_array;
+        }
+    }
+};
+#endif //!USE_REGIONS
+
+#ifdef FEATURE_PREMORTEM_FINALIZATION
+#define REGISTER_FOR_FINALIZATION(_object, _size) \
+    hp->finalize_queue->RegisterForFinalization (0, (_object), (_size))
+#else // FEATURE_PREMORTEM_FINALIZATION
+#define REGISTER_FOR_FINALIZATION(_object, _size) true
+#endif // FEATURE_PREMORTEM_FINALIZATION
+
+#define CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(_object, _size, _register) do {  \
+    if ((_object) == NULL || ((_register) && !REGISTER_FOR_FINALIZATION(_object, _size)))   \
+    {                                                                                       \
+        STRESS_LOG_OOM_STACK(_size);                                                        \
+        return NULL;                                                                        \
+    }                                                                                       \
+} while (false)
+
+extern uint64_t qpf;
+extern double qpf_ms;
+extern double qpf_us;
+
+#ifdef FEATURE_BASICFREEZE
+heap_segment* ro_segment_lookup (uint8_t* o);
+#endif //FEATURE_BASICFREEZE
+
+struct imemory_data
+{
+    uint8_t* memory_base;
+};
+
+struct numa_reserved_block
+{
+    uint8_t*        memory_base;
+    size_t          block_size;
+
+    numa_reserved_block() : memory_base(nullptr), block_size(0) { }
+};
+
+struct initial_memory_details
+{
+    imemory_data *initial_memory;
+    imemory_data *initial_normal_heap; // points into initial_memory_array
+    imemory_data *initial_large_heap;  // points into initial_memory_array
+    imemory_data *initial_pinned_heap; // points into initial_memory_array
+
+    size_t block_size_normal;
+    size_t block_size_large;
+    size_t block_size_pinned;
+
+    int block_count;                // # of blocks in each
+    int current_block_normal;
+    int current_block_large;
+    int current_block_pinned;
+
+    enum
+    {
+        ALLATONCE = 1,
+        EACH_GENERATION,
+        EACH_BLOCK,
+        ALLATONCE_SEPARATED_POH,
+        EACH_NUMA_NODE
+    };
+
+    size_t allocation_pattern;
+
+    size_t block_size(int i)
+    {
+        switch (i / block_count)
+        {
+            case 0: return block_size_normal;
+            case 1: return block_size_large;
+            case 2: return block_size_pinned;
+            default: UNREACHABLE();
+        }
+    };
+
+    void* get_initial_memory (int gen, int h_number)
+    {
+        switch (gen)
+        {
+            case soh_gen0:
+            case soh_gen1:
+            case soh_gen2: return initial_normal_heap[h_number].memory_base;
+            case loh_generation: return initial_large_heap[h_number].memory_base;
+            case poh_generation: return initial_pinned_heap[h_number].memory_base;
+            default: UNREACHABLE();
+        }
+    };
+
+    size_t get_initial_size (int gen)
+    {
+        switch (gen)
+        {
+            case soh_gen0:
+            case soh_gen1:
+            case soh_gen2: return block_size_normal;
+            case loh_generation: return block_size_large;
+            case poh_generation: return block_size_pinned;
+            default: UNREACHABLE();
+        }
+    };
+
+    int numa_reserved_block_count;
+    numa_reserved_block* numa_reserved_block_table;
+};
+
+extern initial_memory_details memory_details;
+
+#if defined(BACKGROUND_GC) && !defined(USE_REGIONS)
+#define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE)
+#else
+#define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE)
+#endif //BACKGROUND_GC && !USE_REGIONS
+
+// min size to decommit to make the OS call worthwhile
+#define MIN_DECOMMIT_SIZE  (100*OS_PAGE_SIZE)
+
+#ifdef SERVER_GC
+
+#ifdef HOST_64BIT
+
+#define INITIAL_ALLOC ((size_t)((size_t)4*1024*1024*1024))
+#define LHEAP_ALLOC   ((size_t)(1024*1024*256))
+
+#else
+
+#define INITIAL_ALLOC ((size_t)(1024*1024*64))
+#define LHEAP_ALLOC   ((size_t)(1024*1024*32))
+
+#endif  // HOST_64BIT
+
+#else //SERVER_GC
+
+#ifdef HOST_64BIT
+
+#define INITIAL_ALLOC ((size_t)(1024*1024*256))
+#define LHEAP_ALLOC   ((size_t)(1024*1024*128))
+
+#else
+
+#define INITIAL_ALLOC ((size_t)(1024*1024*16))
+#define LHEAP_ALLOC   ((size_t)(1024*1024*16))
+
+#endif  // HOST_64BIT
+
+#endif //SERVER_GC
+
+} // namespace WKS/SVR
+
+#endif // GC_INTERNAL_H
diff --git a/src/coreclr/gc/gcsvr.cpp b/src/coreclr/gc/gcsvr.cpp
deleted file mode 100644
index 4d54ca2db58aa2..00000000000000
--- a/src/coreclr/gc/gcsvr.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#ifdef FEATURE_SVR_GC
-#define SERVER_GC 1
-#include "gc.cpp"
-#endif // FEATURE_SVR_GC
diff --git a/src/coreclr/gc/gcwks.cpp b/src/coreclr/gc/gcwks.cpp
deleted file mode 100644
index 886e199a29efb4..00000000000000
--- a/src/coreclr/gc/gcwks.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-#ifdef SERVER_GC
-#undef SERVER_GC
-#endif
-
-#include "gc.cpp"
diff --git a/src/coreclr/gc/init.cpp b/src/coreclr/gc/init.cpp
index ccf0b35b3d312c..2be60b63ab0a35 100644
--- a/src/coreclr/gc/init.cpp
+++ b/src/coreclr/gc/init.cpp
@@ -1,6 +1,14 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR {
+#else // SERVER_GC
+namespace WKS {
+#endif // SERVER_GC
+
 #ifdef WRITE_WATCH
 void hardware_write_watch_api_supported()
 {
@@ -1242,6 +1250,11 @@ size_t gc_heap::get_gen0_min_size()
     return gen0size;
 }
 
+#ifndef HOST_64BIT
+// Max size of heap hard limit (2^31) to be able to be aligned and rounded up on power of 2 and not overflow
+const size_t max_heap_hard_limit = (size_t)2 * (size_t)1024 * (size_t)1024 * (size_t)1024;
+#endif //!HOST_64BIT
+
 bool gc_heap::compute_hard_limit_from_heap_limits()
 {
 #ifndef HOST_64BIT
@@ -1552,3 +1565,5 @@ int gc_heap::refresh_memory_limit()
 
     return (int)status;
 }
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/interface.cpp b/src/coreclr/gc/interface.cpp
index 40fcc1f46b51ae..91691a4694d802 100644
--- a/src/coreclr/gc/interface.cpp
+++ b/src/coreclr/gc/interface.cpp
@@ -1,6 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 class NoGCRegionLockHolder
 {
 public:
@@ -14,6 +24,42 @@ class NoGCRegionLockHolder
         leave_spin_lock_noinstru(&g_no_gc_lock);
     }
 };
+
+inline
+CObjectHeader* gc_heap::allocate (size_t jsize, alloc_context* acontext, uint32_t flags)
+{
+    size_t size = Align (jsize);
+    assert (size >= Align (min_obj_size));
+    {
+    retry:
+        uint8_t*  result = acontext->alloc_ptr;
+        acontext->alloc_ptr+=size;
+        if (acontext->alloc_ptr <= acontext->alloc_limit)
+        {
+            CObjectHeader* obj = (CObjectHeader*)result;
+            assert (obj != 0);
+            return obj;
+        }
+        else
+        {
+            acontext->alloc_ptr -= size;
+
+#ifdef _MSC_VER
+#pragma inline_depth(0)
+#endif //_MSC_VER
+
+            if (! allocate_more_space (acontext, size, flags, 0))
+                return 0;
+
+#ifdef _MSC_VER
+#pragma inline_depth(20)
+#endif //_MSC_VER
+
+            goto retry;
+        }
+    }
+}
+
 void GCHeap::Shutdown()
 {
     // This does not work for standalone GC on Windows because windows closed the file
@@ -2736,3 +2782,5 @@ int GCHeap::RefreshMemoryLimit()
 {
     return gc_heap::refresh_memory_limit();
 }
+
+} // namespace SVR/WKS
diff --git a/src/coreclr/gc/mark_phase.cpp b/src/coreclr/gc/mark_phase.cpp
index 9948265b6b763f..eb355e76154a4a 100644
--- a/src/coreclr/gc/mark_phase.cpp
+++ b/src/coreclr/gc/mark_phase.cpp
@@ -1,6 +1,20 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
+#ifdef MULTIPLE_HEAPS
+gc_heap* seg_mapping_table_heap_of_gc (uint8_t* o);
+#endif //MULTIPLE_HEAPS
+
 inline
 size_t clear_special_bits (uint8_t* node)
 {
@@ -97,24 +111,6 @@ size_t gc_heap::deque_pinned_plug ()
     return m;
 }
 
-inline
-mark* gc_heap::pinned_plug_of (size_t bos)
-{
-    return &mark_stack_array [ bos ];
-}
-
-inline
-mark* gc_heap::oldest_pin ()
-{
-    return pinned_plug_of (mark_stack_bos);
-}
-
-inline
-BOOL gc_heap::pinned_plug_que_empty_p ()
-{
-    return (mark_stack_bos == mark_stack_tos);
-}
-
 inline
 mark* gc_heap::before_oldest_pin()
 {
@@ -143,18 +139,22 @@ void gc_heap::make_mark_stack (mark* arr)
 #endif //MH_SC_MARK
 }
 
-#ifdef BACKGROUND_GC
 inline
-size_t& gc_heap::bpromoted_bytes(int thread)
+gc_heap* gc_heap::heap_of_gc (uint8_t* o)
 {
 #ifdef MULTIPLE_HEAPS
-    return g_bpromoted [thread*16];
+    if (o == 0)
+        return g_heaps [0];
+
+    gc_heap* hp = seg_mapping_table_heap_of_gc (o);
+    return (hp ? hp : g_heaps[0]);
 #else //MULTIPLE_HEAPS
-    UNREFERENCED_PARAMETER(thread);
-    return g_bpromoted;
+    UNREFERENCED_PARAMETER(o);
+    return __this;
 #endif //MULTIPLE_HEAPS
 }
 
+#ifdef BACKGROUND_GC
 void gc_heap::make_background_mark_stack (uint8_t** arr)
 {
     background_mark_stack_array = arr;
@@ -169,36 +169,12 @@ void gc_heap::make_c_mark_list (uint8_t** arr)
     c_mark_list_length = 1 + (OS_PAGE_SIZE / MIN_OBJECT_SIZE);
 }
 
-inline
-unsigned int gc_heap::mark_array_marked(uint8_t* add)
-{
-    return mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add));
-}
-
 inline
 BOOL gc_heap::is_mark_bit_set (uint8_t* add)
 {
     return (mark_array [mark_word_of (add)] & (1 << mark_bit_bit_of (add)));
 }
 
-inline
-void gc_heap::mark_array_set_marked (uint8_t* add)
-{
-    size_t index = mark_word_of (add);
-    uint32_t val = (1 << mark_bit_bit_of (add));
-#ifdef MULTIPLE_HEAPS
-    Interlocked::Or (&(mark_array [index]), val);
-#else
-    mark_array [index] |= val;
-#endif
-}
-
-inline
-void gc_heap::mark_array_clear_marked (uint8_t* add)
-{
-    mark_array [mark_word_of (add)] &= ~(1 << mark_bit_bit_of (add));
-}
-
 #ifdef FEATURE_BASICFREEZE
 // end must be page aligned addresses.
 void gc_heap::clear_mark_array (uint8_t* from, uint8_t* end)
@@ -919,17 +895,6 @@ void gc_heap::grow_mark_list ()
 
 #ifdef BACKGROUND_GC
 #ifdef FEATURE_BASICFREEZE
-inline
-void gc_heap::seg_clear_mark_array_bits_soh (heap_segment* seg)
-{
-    uint8_t* range_beg = 0;
-    uint8_t* range_end = 0;
-    if (bgc_mark_array_range (seg, FALSE, &range_beg, &range_end))
-    {
-        clear_mark_array (range_beg, align_on_mark_word (range_end));
-    }
-}
-
 inline
 void gc_heap::seg_set_mark_array_bits_soh (heap_segment* seg)
 {
@@ -1011,13 +976,7 @@ void gc_heap::bgc_clear_batch_mark_array_bits (uint8_t* start, uint8_t* end)
 
 #endif //BACKGROUND_GC
 
-inline
-BOOL gc_heap::is_mark_set (uint8_t* o)
-{
-    return marked (o);
-}
-
-inline
+/*inline*/
 size_t gc_heap::get_promoted_bytes()
 {
 #ifdef USE_REGIONS
@@ -1477,24 +1436,6 @@ BOOL gc_heap::gc_mark1 (uint8_t* o)
     return marked;
 }
 
-#ifdef USE_REGIONS
-inline bool gc_heap::is_in_gc_range (uint8_t* o)
-{
-#ifdef FEATURE_BASICFREEZE
-    // we may have frozen objects in read only segments
-    // outside of the reserved address range of the gc heap
-    assert (((g_gc_lowest_address <= o) && (o < g_gc_highest_address)) ||
-        (o == nullptr) || (ro_segment_lookup (o) != nullptr));
-#else //FEATURE_BASICFREEZE
-    // without frozen objects, every non-null pointer must be
-    // within the heap
-    assert ((o == nullptr) || (g_gc_lowest_address <= o) && (o < g_gc_highest_address));
-#endif //FEATURE_BASICFREEZE
-    return ((gc_low <= o) && (o < gc_high));
-}
-
-#endif //USE_REGIONS
-
 inline
 BOOL gc_heap::gc_mark (uint8_t* o, uint8_t* low, uint8_t* high, int condemned_gen)
 {
@@ -2871,24 +2812,6 @@ void gc_heap::fire_mark_event (int root_type, size_t& current_promoted_bytes, si
 #endif // FEATURE_EVENT_TRACE
 }
 
-#ifdef FEATURE_EVENT_TRACE
-inline
-void gc_heap::record_mark_time (uint64_t& mark_time,
-                                uint64_t& current_mark_time,
-                                uint64_t& last_mark_time)
-{
-    if (informational_event_enabled_p)
-    {
-        current_mark_time = GetHighPrecisionTimeStamp();
-        mark_time = limit_time_to_uint32 (current_mark_time - last_mark_time);
-        dprintf (3, ("%zd - %zd = %zd",
-            current_mark_time, last_mark_time, (current_mark_time - last_mark_time)));
-        last_mark_time = current_mark_time;
-    }
-}
-
-#endif //FEATURE_EVENT_TRACE
-
 void gc_heap::mark_phase (int condemned_gen_number)
 {
     assert (settings.concurrent == FALSE);
@@ -3597,22 +3520,6 @@ void gc_heap::mark_phase (int condemned_gen_number)
     dprintf(2,("---- End of mark phase ----"));
 }
 
-inline
-void gc_heap::pin_object (uint8_t* o, uint8_t** ppObject)
-{
-    dprintf (3, ("Pinning %zx->%zx", (size_t)ppObject, (size_t)o));
-    set_pinned (o);
-
-#ifdef FEATURE_EVENT_TRACE
-    if(EVENT_ENABLED(PinObjectAtGCTime))
-    {
-        fire_etw_pin_object_event(o, ppObject);
-    }
-#endif // FEATURE_EVENT_TRACE
-
-    num_pinned_objects++;
-}
-
 size_t gc_heap::get_total_pinned_objects()
 {
 #ifdef MULTIPLE_HEAPS
@@ -3728,7 +3635,7 @@ void gc_heap::grow_mark_list_piece()
 // if the child object's region is <= condemned_gen.
 // cg_pointers_found means it's pointing into a lower generation so it's incremented
 // if the child object's region is < current_gen.
-inline void
+/*inline*/ void
 gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen,
                                     size_t& cg_pointers_found,
                                     card_fn fn, uint8_t* nhigh,
@@ -4202,3 +4109,5 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating CARD_
             n_gen, n_eph, n_card_set, total_cards_cleared, generation_skip_ratio));
     }
 }
+
+} // namespace SVR/WKS
diff --git a/src/coreclr/gc/memory.cpp b/src/coreclr/gc/memory.cpp
index cd533a16e8036d..2f18ec90553dce 100644
--- a/src/coreclr/gc/memory.cpp
+++ b/src/coreclr/gc/memory.cpp
@@ -1,6 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 bool gc_heap::virtual_alloc_commit_for_heap (void* addr, size_t size, int h_number)
 {
 #ifdef MULTIPLE_HEAPS
@@ -326,6 +336,7 @@ bool gc_heap::decommit_step (uint64_t step_milliseconds)
         decommit_size += hp->decommit_ephemeral_segment_pages_step ();
     }
 #endif //MULTIPLE_HEAPS
+
     return (decommit_size != 0);
 }
 
@@ -482,3 +493,5 @@ size_t gc_heap::decommit_ephemeral_segment_pages_step ()
 }
 
 #endif //MULTIPLE_HEAPS
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/no_gc.cpp b/src/coreclr/gc/no_gc.cpp
index 6569f84dead1df..dfcc281e64129e 100644
--- a/src/coreclr/gc/no_gc.cpp
+++ b/src/coreclr/gc/no_gc.cpp
@@ -1,6 +1,14 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR {
+#else // SERVER_GC
+namespace WKS {
+#endif // SERVER_GC
+
 void gc_heap::update_collection_counts_for_no_gc()
 {
     assert (settings.pause_mode == pause_no_gc);
@@ -929,3 +937,5 @@ enable_no_gc_region_callback_status gc_heap::enable_no_gc_callback(NoGCRegionCal
 
     return status;
 }
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/plan_phase.cpp b/src/coreclr/gc/plan_phase.cpp
index eee724dad0892d..da04f8b4003793 100644
--- a/src/coreclr/gc/plan_phase.cpp
+++ b/src/coreclr/gc/plan_phase.cpp
@@ -1,6 +1,19 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
+// If every heap's gen2 or gen3 size is less than this threshold we will do a blocking GC.
+const size_t bgc_min_per_heap = 4*1024*1024;
+
 inline
 BOOL is_induced_blocking (gc_reason reason)
 {
@@ -723,66 +736,156 @@ bool gc_heap::init_table_for_region (int gen_number, heap_segment* region)
 
 #endif //USE_REGIONS
 
-// The following 2 methods Use integer division to prevent potential floating point exception.
-// FPE may occur if we use floating point division because of speculative execution.
-//
-// Return the percentage of efficiency (between 0 and 100) of the allocator.
-inline
-size_t gc_heap::generation_allocator_efficiency_percent (generation* inst)
+inline BOOL
+gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp)
 {
-#ifdef DYNAMIC_HEAP_COUNT
-    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
+    BOOL ret = FALSE;
+
+    switch (tp)
     {
-        uint64_t total_plan_allocated = generation_total_plan_allocated (inst);
-        uint64_t condemned_allocated = generation_condemned_allocated (inst);
-        return ((total_plan_allocated == 0) ? 0 : (100 * (total_plan_allocated - condemned_allocated) / total_plan_allocated));
+        case tuning_deciding_condemned_gen:
+#ifndef USE_REGIONS
+        case tuning_deciding_compaction:
+        case tuning_deciding_expansion:
+#endif //USE_REGIONS
+        case tuning_deciding_full_gc:
+        {
+            ret = (!ephemeral_gen_fit_p (tp));
+            break;
+        }
+#ifndef USE_REGIONS
+        case tuning_deciding_promote_ephemeral:
+        {
+            size_t new_gen0size = approximate_new_allocation();
+            ptrdiff_t plan_ephemeral_size = total_ephemeral_size;
+
+            dprintf (GTC_LOG, ("h%d: plan eph size is %zd, new gen0 is %zd",
+                heap_number, plan_ephemeral_size, new_gen0size));
+            ret = ((soh_segment_size - segment_info_size) < (plan_ephemeral_size + new_gen0size));
+            break;
+        }
+#endif //USE_REGIONS
+        default:
+        {
+            assert (!"invalid tuning reason");
+            break;
+        }
     }
-    else
-#endif //DYNAMIC_HEAP_COUNT
+
+    return ret;
+}
+
+inline BOOL
+gc_heap::dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number)
+{
+    BOOL ret = FALSE;
+
+    switch (tp)
     {
-        uint64_t free_obj_space = generation_free_obj_space (inst);
-        uint64_t free_list_allocated = generation_free_list_allocated (inst);
-        if ((free_list_allocated + free_obj_space) == 0)
-            return 0;
-        return (size_t)((100 * free_list_allocated) / (free_list_allocated + free_obj_space));
+        case tuning_deciding_condemned_gen:
+        {
+            if (gen_number == max_generation)
+            {
+                size_t est_maxgen_free = estimated_reclaim (gen_number);
+
+                uint32_t num_heaps = 1;
+#ifdef MULTIPLE_HEAPS
+                num_heaps = gc_heap::n_heaps;
+#endif //MULTIPLE_HEAPS
+
+                size_t min_frag_th = min_reclaim_fragmentation_threshold (num_heaps);
+                dprintf (GTC_LOG, ("h%d, min frag is %zd", heap_number, min_frag_th));
+                ret = (est_maxgen_free >= min_frag_th);
+            }
+            else
+            {
+                assert (0);
+            }
+            break;
+        }
+
+        default:
+            break;
     }
+
+    return ret;
 }
 
-inline
-size_t gc_heap::generation_unusable_fragmentation (generation* inst, int hn)
+inline BOOL
+gc_heap::dt_estimate_high_frag_p (gc_tuning_point tp, int gen_number, uint64_t available_mem)
 {
-#ifdef DYNAMIC_HEAP_COUNT
-    if (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
-    {
-        uint64_t total_plan_allocated = generation_total_plan_allocated (inst);
-        uint64_t condemned_allocated = generation_condemned_allocated (inst);
-        uint64_t unusable_frag = 0;
-        size_t fo_space = (((ptrdiff_t)generation_free_obj_space (inst) < 0) ? 0 : generation_free_obj_space (inst));
+    BOOL ret = FALSE;
 
-        if (total_plan_allocated != 0)
+    switch (tp)
+    {
+        case tuning_deciding_condemned_gen:
         {
-            unusable_frag = fo_space + (condemned_allocated * generation_free_list_space (inst) / total_plan_allocated);
-        }
+            if (gen_number == max_generation)
+            {
+                dynamic_data* dd = dynamic_data_of (gen_number);
+                float est_frag_ratio = 0;
+                if (dd_current_size (dd) == 0)
+                {
+                    est_frag_ratio = 1;
+                }
+                else if ((dd_fragmentation (dd) == 0) || (dd_fragmentation (dd) + dd_current_size (dd) == 0))
+                {
+                    est_frag_ratio = 0;
+                }
+                else
+                {
+                    est_frag_ratio = (float)dd_fragmentation (dd) / (float)(dd_fragmentation (dd) + dd_current_size (dd));
+                }
 
-        dprintf (3, ("h%d g%d FLa: %Id, ESa: %Id, Ca: %Id | FO: %Id, FL %Id, fl effi %.3f, unusable fl is %Id",
-            hn, inst->gen_num,
-            generation_free_list_allocated (inst), generation_end_seg_allocated (inst), (size_t)condemned_allocated,
-            fo_space, generation_free_list_space (inst),
-            ((total_plan_allocated == 0) ? 1.0 : ((float)(total_plan_allocated - condemned_allocated) / (float)total_plan_allocated)),
-            (size_t)unusable_frag));
+                size_t est_frag = (dd_fragmentation (dd) + (size_t)((dd_desired_allocation (dd) - dd_new_allocation (dd)) * est_frag_ratio));
+                dprintf (GTC_LOG, ("h%d: gen%d: current_size is %zd, frag is %zd, est_frag_ratio is %d%%, estimated frag is %zd",
+                    heap_number,
+                    gen_number,
+                    dd_current_size (dd),
+                    dd_fragmentation (dd),
+                    (int)(est_frag_ratio * 100),
+                    est_frag));
+
+                uint32_t num_heaps = 1;
+
+#ifdef MULTIPLE_HEAPS
+                num_heaps = gc_heap::n_heaps;
+#endif //MULTIPLE_HEAPS
+                uint64_t min_frag_th = min_high_fragmentation_threshold(available_mem, num_heaps);
+                ret = (est_frag >= min_frag_th);
+            }
+            else
+            {
+                assert (0);
+            }
+            break;
+        }
 
-        return (size_t)unusable_frag;
+        default:
+            break;
     }
-    else
-#endif //DYNAMIC_HEAP_COUNT
+
+    return ret;
+}
+
+inline BOOL
+gc_heap::dt_low_card_table_efficiency_p (gc_tuning_point tp)
+{
+    BOOL ret = FALSE;
+
+    switch (tp)
     {
-        uint64_t free_obj_space = generation_free_obj_space (inst);
-        uint64_t free_list_allocated = generation_free_list_allocated (inst);
-        uint64_t free_list_space = generation_free_list_space (inst);
-        if ((free_list_allocated + free_obj_space) == 0)
-            return 0;
-        return (size_t)(free_obj_space + (free_obj_space * free_list_space) / (free_list_allocated + free_obj_space));
+    case tuning_deciding_condemned_gen:
+    {
+        ret = (generation_skip_ratio < generation_skip_ratio_threshold);
+        break;
     }
+
+    default:
+        break;
+    }
+
+    return ret;
 }
 
 /*
@@ -1974,12 +2077,6 @@ void gc_heap::process_ephemeral_boundaries (uint8_t* x,
 
 #endif //!USE_REGIONS
 #ifdef FEATURE_LOH_COMPACTION
-inline
-BOOL gc_heap::loh_pinned_plug_que_empty_p()
-{
-    return (loh_pinned_queue_bos == loh_pinned_queue_tos);
-}
-
 void gc_heap::loh_set_allocator_next_pin()
 {
     if (!(loh_pinned_plug_que_empty_p()))
@@ -2005,12 +2102,6 @@ size_t gc_heap::loh_deque_pinned_plug ()
     return m;
 }
 
-inline
-mark* gc_heap::loh_pinned_plug_of (size_t bos)
-{
-    return &loh_pinned_queue[bos];
-}
-
 inline
 mark* gc_heap::loh_oldest_pin()
 {
@@ -2198,20 +2289,6 @@ BOOL gc_heap::loh_compaction_requested()
     return (loh_compaction_always_p || (loh_compaction_mode != loh_compaction_default));
 }
 
-inline
-void gc_heap::check_loh_compact_mode (BOOL all_heaps_compacted_p)
-{
-    if (settings.loh_compaction && (loh_compaction_mode == loh_compaction_once))
-    {
-        if (all_heaps_compacted_p)
-        {
-            // If the compaction mode says to compact once and we are going to compact LOH,
-            // we need to revert it back to no compaction.
-            loh_compaction_mode = loh_compaction_default;
-        }
-    }
-}
-
 BOOL gc_heap::plan_loh()
 {
 #ifdef FEATURE_EVENT_TRACE
@@ -2482,8 +2559,8 @@ void gc_heap::record_interesting_data_point (interesting_data_point idp)
 #else
     UNREFERENCED_PARAMETER(idp);
 #endif //GC_CONFIG_DRIVEN
-}
 
+}
 #ifdef USE_REGIONS
 void gc_heap::skip_pins_in_alloc_region (generation* consing_gen, int plan_gen_num)
 {
@@ -3223,6 +3300,17 @@ inline void save_allocated(heap_segment* seg)
     }
 }
 
+#ifdef USE_INTROSORT
+#define _sort introsort::sort
+#elif defined(USE_VXSORT)
+// in this case we have do_vxsort which takes an additional range that
+// all items to be sorted are contained in
+// so do not #define _sort
+#else //USE_INTROSORT
+#define _sort qsort1
+void qsort1(uint8_t** low, uint8_t** high, unsigned int depth);
+#endif //USE_INTROSORT
+
 void gc_heap::plan_phase (int condemned_gen_number)
 {
     size_t old_gen2_allocated = 0;
@@ -5977,23 +6065,6 @@ void gc_heap::sweep_region_in_plan (heap_segment* region,
     }
 }
 
-inline
-void gc_heap::check_demotion_helper_sip (uint8_t** pval, int parent_gen_num, uint8_t* parent_loc)
-{
-    uint8_t* child_object = *pval;
-    if (!is_in_heap_range (child_object))
-        return;
-    assert (child_object != nullptr);
-    int child_object_plan_gen = get_region_plan_gen_num (child_object);
-
-    if (child_object_plan_gen < parent_gen_num)
-    {
-        set_card (card_of (parent_loc));
-    }
-
-    dprintf (3, ("SCS %d, %d", child_object_plan_gen, parent_gen_num));
-}
-
 #endif //USE_REGIONS
 #ifndef USE_REGIONS
 #ifdef SEG_REUSE_STATS
@@ -8367,3 +8438,5 @@ BOOL gc_heap::should_do_sweeping_gc (BOOL compact_p)
 }
 
 #endif //GC_CONFIG_DRIVEN
+
+} // namespace SVR/WKS
diff --git a/src/coreclr/gc/region_allocator.cpp b/src/coreclr/gc/region_allocator.cpp
index c30493055ee204..fae6b7f4b0a51f 100644
--- a/src/coreclr/gc/region_allocator.cpp
+++ b/src/coreclr/gc/region_allocator.cpp
@@ -1,8 +1,18 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
 
 #ifdef USE_REGIONS
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 bool region_allocator::init (uint8_t* start, uint8_t* end, size_t alignment, uint8_t** lowest, uint8_t** highest)
 {
     uint8_t* actual_start = start;
@@ -488,4 +498,7 @@ void region_allocator::move_highest_free_regions (int64_t n, bool small_region_p
         current_index -= current_num_units;
     }
 }
+
+} // namespace WKS/SVR
+
 #endif //USE_REGIONS
diff --git a/src/coreclr/gc/region_free_list.cpp b/src/coreclr/gc/region_free_list.cpp
index 24dfc127baa7e2..98eb10bbb9b545 100644
--- a/src/coreclr/gc/region_free_list.cpp
+++ b/src/coreclr/gc/region_free_list.cpp
@@ -1,8 +1,18 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
 
 #ifdef USE_REGIONS
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 region_free_list::region_free_list() : num_free_regions (0),
                                        size_free_regions (0),
                                        size_committed_in_free_regions (0),
@@ -480,4 +490,7 @@ void region_free_list::sort_by_committed_and_age()
     }
     tail_free_region = prev;
 }
+
+} // namespace WKS/SVR
+
 #endif //USE_REGIONS
diff --git a/src/coreclr/gc/regions_segments.cpp b/src/coreclr/gc/regions_segments.cpp
index 613b96468a958f..26417f2c83679c 100644
--- a/src/coreclr/gc/regions_segments.cpp
+++ b/src/coreclr/gc/regions_segments.cpp
@@ -1,6 +1,45 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
+inline
+uint8_t* align_on_segment (uint8_t* add)
+{
+    return (uint8_t*)((size_t)(add + (((size_t)1 << gc_heap::min_segment_size_shr) - 1)) & ~(((size_t)1 << gc_heap::min_segment_size_shr) - 1));
+}
+
+#ifdef FEATURE_BASICFREEZE
+inline
+size_t ro_seg_begin_index (heap_segment* seg)
+{
+#ifdef USE_REGIONS
+    size_t begin_index = (size_t)heap_segment_mem (seg) >> gc_heap::min_segment_size_shr;
+#else
+    size_t begin_index = (size_t)seg >> gc_heap::min_segment_size_shr;
+#endif //USE_REGIONS
+    begin_index = max (begin_index, (size_t)g_gc_lowest_address >> gc_heap::min_segment_size_shr);
+    return begin_index;
+}
+
+inline
+size_t ro_seg_end_index (heap_segment* seg)
+{
+    size_t end_index = (size_t)(heap_segment_reserved (seg) - 1) >> gc_heap::min_segment_size_shr;
+    end_index = min (end_index, (size_t)g_gc_highest_address >> gc_heap::min_segment_size_shr);
+    return end_index;
+}
+
+#endif //FEATURE_BASICFREEZE
+
 size_t size_seg_mapping_table_of (uint8_t* from, uint8_t* end)
 {
     from = align_lower_segment (from);
@@ -17,12 +56,6 @@ size_t size_region_to_generation_table_of (uint8_t* from, uint8_t* end)
     return sizeof (uint8_t)*((size_t)(end - from) >> gc_heap::min_segment_size_shr);
 }
 
-inline
-size_t seg_mapping_word_of (uint8_t* add)
-{
-    return (size_t)add >> gc_heap::min_segment_size_shr;
-}
-
 #ifdef FEATURE_BASICFREEZE
 void seg_mapping_table_add_ro_segment (heap_segment* seg)
 {
@@ -1087,7 +1120,11 @@ bool gc_heap::is_region_demoted (uint8_t* obj)
     return demoted_p;
 }
 
-inline
+#ifdef USE_REGIONS
+static GCSpinLock write_barrier_spin_lock;
+#endif //USE_REGIONS
+
+/*inline*/
 void gc_heap::set_region_gen_num (heap_segment* region, int gen_num)
 {
     assert (gen_num < (1 << (sizeof (uint8_t) * 8)));
@@ -1164,67 +1201,6 @@ void gc_heap::set_region_gen_num (heap_segment* region, int gen_num)
     }
 }
 
-inline
-void gc_heap::set_region_plan_gen_num (heap_segment* region, int plan_gen_num, bool replace_p)
-{
-    int gen_num = heap_segment_gen_num (region);
-    int supposed_plan_gen_num = get_plan_gen_num (gen_num);
-    dprintf (REGIONS_LOG, ("h%d setting plan gen on %p->%p(was gen%d) to %d(should be: %d) %s",
-        heap_number, region,
-        heap_segment_mem (region),
-        gen_num, plan_gen_num,
-        supposed_plan_gen_num,
-        ((plan_gen_num < supposed_plan_gen_num) ? "DEMOTED" : "ND")));
-    region_info region_info_bits_to_set = (region_info)(plan_gen_num << RI_PLAN_GEN_SHR);
-    if ((plan_gen_num < supposed_plan_gen_num) && (heap_segment_pinned_survived (region) != 0))
-    {
-        if (!settings.demotion)
-        {
-            settings.demotion = TRUE;
-        }
-        get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
-        region->flags |= heap_segment_flags_demoted;
-        region_info_bits_to_set = (region_info)(region_info_bits_to_set | RI_DEMOTED);
-    }
-    else
-    {
-        region->flags &= ~heap_segment_flags_demoted;
-    }
-
-    // If replace_p is true, it means we need to move a region from its original planned gen to this new gen.
-    if (replace_p)
-    {
-        int original_plan_gen_num = heap_segment_plan_gen_num (region);
-        planned_regions_per_gen[original_plan_gen_num]--;
-    }
-
-    planned_regions_per_gen[plan_gen_num]++;
-    dprintf (REGIONS_LOG, ("h%d g%d %zx(%zx) -> g%d (total %d region planned in g%d)",
-        heap_number, heap_segment_gen_num (region), (size_t)region, heap_segment_mem (region), plan_gen_num, planned_regions_per_gen[plan_gen_num], plan_gen_num));
-
-    heap_segment_plan_gen_num (region) = plan_gen_num;
-
-    uint8_t* region_start = get_region_start (region);
-    uint8_t* region_end = heap_segment_reserved (region);
-
-    size_t region_index_start = get_basic_region_index_for_address (region_start);
-    size_t region_index_end = get_basic_region_index_for_address (region_end);
-    for (size_t region_index = region_index_start; region_index < region_index_end; region_index++)
-    {
-        assert (plan_gen_num <= max_generation);
-        map_region_to_generation[region_index] = (region_info)(region_info_bits_to_set | (map_region_to_generation[region_index] & ~(RI_PLAN_GEN_MASK|RI_DEMOTED)));
-    }
-}
-
-inline
-void gc_heap::set_region_plan_gen_num_sip (heap_segment* region, int plan_gen_num)
-{
-    if (!heap_segment_swept_in_plan (region))
-    {
-        set_region_plan_gen_num (region, plan_gen_num);
-    }
-}
-
 void gc_heap::set_region_sweep_in_plan (heap_segment*region)
 {
     heap_segment_swept_in_plan (region) = true;
@@ -2385,3 +2361,5 @@ void gc_heap::generation_delete_heap_segment (generation* gen,
 }
 
 #endif //BACKGROUND_GC
+
+} // namespace SVR/WKS
diff --git a/src/coreclr/gc/relocate_compact.cpp b/src/coreclr/gc/relocate_compact.cpp
index 7d8caa5e9926bb..161dc211b3e482 100644
--- a/src/coreclr/gc/relocate_compact.cpp
+++ b/src/coreclr/gc/relocate_compact.cpp
@@ -1,6 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 void memcopy (uint8_t* dmem, uint8_t* smem, size_t size)
 {
     const size_t sz4ptr = sizeof(PTR_PTR)*4;
@@ -53,6 +63,24 @@ bool gc_heap::should_check_brick_for_reloc (uint8_t* o)
     return (map_region_to_generation_skewed[skewed_basic_region_index] & (RI_SIP|RI_GEN_MASK)) <= settings.condemned_generation;
 }
 
+inline
+void gc_heap::check_demotion_helper_sip (uint8_t** pval, int parent_gen_num, uint8_t* parent_loc)
+{
+    uint8_t* child_object = *pval;
+    if (!is_in_heap_range (child_object))
+        return;
+
+    assert (child_object != nullptr);
+    int child_object_plan_gen = get_region_plan_gen_num (child_object);
+
+    if (child_object_plan_gen < parent_gen_num)
+    {
+        set_card (card_of (parent_loc));
+    }
+
+    dprintf (3, ("SCS %d, %d", child_object_plan_gen, parent_gen_num));
+}
+
 #endif //USE_REGIONS
 
 #ifdef FEATURE_LOH_COMPACTION
@@ -848,6 +876,18 @@ void gc_heap::verify_pins_with_post_plug_info (const char* msg)
 #endif // _DEBUG && VERIFY_HEAP
 }
 
+#ifdef COLLECTIBLE_CLASS
+// We don't want to burn another ptr size space for pinned plugs to record this so just
+// set the card unconditionally for collectible objects if we are demoting.
+inline void gc_heap::unconditional_set_card_collectible (uint8_t* obj)
+{
+    if (settings.demotion)
+    {
+        set_card (card_of (obj));
+    }
+}
+#endif //COLLECTIBLE_CLASS
+
 void gc_heap::relocate_shortened_survivor_helper (uint8_t* plug, uint8_t* plug_end, mark* pinned_plug_entry)
 {
     uint8_t*  x = plug;
@@ -1013,12 +1053,6 @@ void gc_heap::relocate_survivors_in_brick (uint8_t* tree, relocate_args* args)
     }
 }
 
-inline
-void gc_heap::update_oldest_pinned_plug()
-{
-    oldest_pinned_plug = (pinned_plug_que_empty_p() ? 0 : pinned_plug (oldest_pin()));
-}
-
 heap_segment* gc_heap::get_start_segment (generation* gen)
 {
     heap_segment* start_heap_segment = heap_segment_rw (generation_start_segment (gen));
@@ -2259,3 +2293,5 @@ void gc_heap::relocate_in_uoh_objects (int gen_num)
         }
     }
 }
+
+} // namespace WKS/SVR
diff --git a/src/coreclr/gc/sample/CMakeLists.txt b/src/coreclr/gc/sample/CMakeLists.txt
index 34bb8526230c71..28835e250d6361 100644
--- a/src/coreclr/gc/sample/CMakeLists.txt
+++ b/src/coreclr/gc/sample/CMakeLists.txt
@@ -11,10 +11,29 @@ set(SOURCES
     ../gceventstatus.cpp
     ../gcconfig.cpp
     ../gccommon.cpp
-    ../gceewks.cpp
+    ../gcee.cpp
     ../gchandletable.cpp
     ../gcscan.cpp
-    ../gcwks.cpp
+    ../gc.cpp
+    ../init.cpp
+    ../no_gc.cpp
+    ../finalization.cpp
+    ../dynamic_tuning.cpp
+    ../region_free_list.cpp
+    ../region_allocator.cpp
+    ../memory.cpp
+    ../sweep.cpp
+    ../collect.cpp
+    ../diagnostics.cpp
+    ../dynamic_heap_count.cpp
+    ../card_table.cpp
+    ../relocate_compact.cpp
+    ../mark_phase.cpp
+    ../background.cpp
+    ../interface.cpp
+    ../allocation.cpp
+    ../plan_phase.cpp
+    ../regions_segments.cpp
     ../gcload.cpp
     ../handletable.cpp
     ../handletablecache.cpp
diff --git a/src/coreclr/gc/sample/GCSample.vcxproj b/src/coreclr/gc/sample/GCSample.vcxproj
index 0b7e657b35f807..198358729ea7b8 100644
--- a/src/coreclr/gc/sample/GCSample.vcxproj
+++ b/src/coreclr/gc/sample/GCSample.vcxproj
@@ -94,9 +94,28 @@
     <ClCompile Include="..\gchandletable.cpp" />
     <ClCompile Include="..\gcload.cpp" />
     <ClCompile Include="..\gccommon.cpp" />
-    <ClCompile Include="..\gceewks.cpp" />
+    <ClCompile Include="..\gcee.cpp" />
     <ClCompile Include="..\gcscan.cpp" />
-    <ClCompile Include="..\gcwks.cpp" />
+    <ClCompile Include="..\gc.cpp" />
+    <ClCompile Include="..\init.cpp" />
+    <ClCompile Include="..\no_gc.cpp" />
+    <ClCompile Include="..\finalization.cpp" />
+    <ClCompile Include="..\dynamic_tuning.cpp" />
+    <ClCompile Include="..\region_free_list.cpp" />
+    <ClCompile Include="..\region_allocator.cpp" />
+    <ClCompile Include="..\memory.cpp" />
+    <ClCompile Include="..\sweep.cpp" />
+    <ClCompile Include="..\collect.cpp" />
+    <ClCompile Include="..\diagnostics.cpp" />
+    <ClCompile Include="..\dynamic_heap_count.cpp" />
+    <ClCompile Include="..\card_table.cpp" />
+    <ClCompile Include="..\relocate_compact.cpp" />
+    <ClCompile Include="..\mark_phase.cpp" />
+    <ClCompile Include="..\background.cpp" />
+    <ClCompile Include="..\interface.cpp" />
+    <ClCompile Include="..\allocation.cpp" />
+    <ClCompile Include="..\plan_phase.cpp" />
+    <ClCompile Include="..\regions_segments.cpp" />
     <ClCompile Include="..\handletable.cpp" />
     <ClCompile Include="..\handletablecache.cpp" />
     <ClCompile Include="..\handletablecore.cpp" />
diff --git a/src/coreclr/gc/sample/GCSample.vcxproj.filters b/src/coreclr/gc/sample/GCSample.vcxproj.filters
index 9fac162f4ac83e..6477fad46f3265 100644
--- a/src/coreclr/gc/sample/GCSample.vcxproj.filters
+++ b/src/coreclr/gc/sample/GCSample.vcxproj.filters
@@ -41,13 +41,70 @@
     <ClCompile Include="..\handletablecore.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\gcwks.cpp">
+    <ClCompile Include="..\gc.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\init.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\no_gc.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\finalization.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\dynamic_tuning.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\region_free_list.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\region_allocator.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\memory.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\sweep.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\collect.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\diagnostics.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\dynamic_heap_count.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\card_table.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\relocate_compact.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\mark_phase.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\background.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\interface.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\allocation.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\plan_phase.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\regions_segments.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\gcscan.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\gceewks.cpp">
+    <ClCompile Include="..\gcee.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\gccommon.cpp">
@@ -75,4 +132,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/coreclr/gc/sweep.cpp b/src/coreclr/gc/sweep.cpp
index 25a1825639eb2d..65d752156b9e40 100644
--- a/src/coreclr/gc/sweep.cpp
+++ b/src/coreclr/gc/sweep.cpp
@@ -1,6 +1,16 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+#include "gcinternal.h"
+
+#ifdef SERVER_GC
+namespace SVR
+{
+#else // SERVER_GC
+namespace WKS
+{
+#endif // SERVER_GC
+
 #ifdef FEATURE_BASICFREEZE
 
 inline
@@ -602,3 +612,5 @@ void gc_heap::sweep_uoh_objects (int gen_num)
     _ASSERTE(generation_allocation_segment(gen) != NULL);
 }
 
+} // namespace WKS/SVR
+
diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt
index 9f3a80c702358e..4042296d016a0b 100644
--- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt
+++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt
@@ -1,5 +1,29 @@
 set(GC_DIR ../../gc)
 
+set(GC_WKS_SVR_SOURCES
+    ${GC_DIR}/gc.cpp
+    ${GC_DIR}/init.cpp
+    ${GC_DIR}/no_gc.cpp
+    ${GC_DIR}/finalization.cpp
+    ${GC_DIR}/dynamic_tuning.cpp
+    ${GC_DIR}/region_free_list.cpp
+    ${GC_DIR}/region_allocator.cpp
+    ${GC_DIR}/memory.cpp
+    ${GC_DIR}/sweep.cpp
+    ${GC_DIR}/collect.cpp
+    ${GC_DIR}/diagnostics.cpp
+    ${GC_DIR}/dynamic_heap_count.cpp
+    ${GC_DIR}/card_table.cpp
+    ${GC_DIR}/relocate_compact.cpp
+    ${GC_DIR}/mark_phase.cpp
+    ${GC_DIR}/background.cpp
+    ${GC_DIR}/interface.cpp
+    ${GC_DIR}/allocation.cpp
+    ${GC_DIR}/plan_phase.cpp
+    ${GC_DIR}/regions_segments.cpp
+    ${GC_DIR}/gcee.cpp
+)
+
 set(COMMON_RUNTIME_SOURCES
     allocheap.cpp
     rhassert.cpp
@@ -44,8 +68,6 @@ set(COMMON_RUNTIME_SOURCES
     ${GC_DIR}/gcconfig.cpp
     ${GC_DIR}/gchandletable.cpp
     ${GC_DIR}/gccommon.cpp
-    ${GC_DIR}/gceewks.cpp
-    ${GC_DIR}/gcwks.cpp
     ${GC_DIR}/gcscan.cpp
     ${GC_DIR}/handletable.cpp
     ${GC_DIR}/handletablecache.cpp
@@ -62,11 +84,6 @@ if (CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
     )
 endif()
 
-set(SERVER_GC_SOURCES
-    ${GC_DIR}/gceesvr.cpp
-    ${GC_DIR}/gcsvr.cpp
-)
-
 set(STANDALONEGC_DISABLED_SOURCES
     clrgc.disabled.cpp
 )
@@ -330,7 +347,7 @@ list(APPEND COMMON_RUNTIME_SOURCES ${GC_HEADERS})
 
 convert_to_absolute_path(COMMON_RUNTIME_SOURCES ${COMMON_RUNTIME_SOURCES})
 convert_to_absolute_path(FULL_RUNTIME_SOURCES ${FULL_RUNTIME_SOURCES})
-convert_to_absolute_path(SERVER_GC_SOURCES ${SERVER_GC_SOURCES})
+convert_to_absolute_path(GC_WKS_SVR_SOURCES ${GC_WKS_SVR_SOURCES})
 convert_to_absolute_path(STANDALONEGC_DISABLED_SOURCES ${STANDALONEGC_DISABLED_SOURCES})
 convert_to_absolute_path(STANDALONEGC_ENABLED_SOURCES ${STANDALONEGC_ENABLED_SOURCES})
 convert_to_absolute_path(RUNTIME_SOURCES_ARCH_ASM ${RUNTIME_SOURCES_ARCH_ASM})
diff --git a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt
index 74cdeca700a1ae..4dafc728c7970c 100644
--- a/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt
+++ b/src/coreclr/nativeaot/Runtime/Full/CMakeLists.txt
@@ -22,11 +22,27 @@ if (CLR_CMAKE_TARGET_WIN32)
   endif()
 endif (CLR_CMAKE_TARGET_WIN32)
 
-add_library(Runtime.WorkstationGC STATIC ${COMMON_RUNTIME_SOURCES} ${FULL_RUNTIME_SOURCES} ${RUNTIME_ARCH_ASM_OBJECTS})
+add_library(Runtime.GC.Workstation OBJECT ${GC_WKS_SVR_SOURCES})
+add_dependencies(Runtime.GC.Workstation aot_eventing_headers)
+
+add_library(Runtime.GC.Server OBJECT ${GC_WKS_SVR_SOURCES})
+add_dependencies(Runtime.GC.Server aot_eventing_headers)
+target_compile_definitions(Runtime.GC.Server PRIVATE FEATURE_SVR_GC SERVER_GC)
+
+add_library(Runtime.WorkstationGC STATIC
+    ${COMMON_RUNTIME_SOURCES}
+    ${FULL_RUNTIME_SOURCES}
+    ${RUNTIME_ARCH_ASM_OBJECTS}
+    $<TARGET_OBJECTS:Runtime.GC.Workstation>)
 add_dependencies(Runtime.WorkstationGC aot_eventing_headers)
 target_link_libraries(Runtime.WorkstationGC PRIVATE aotminipal)
 
-add_library(Runtime.ServerGC STATIC ${COMMON_RUNTIME_SOURCES} ${FULL_RUNTIME_SOURCES} ${SERVER_GC_SOURCES} ${RUNTIME_ARCH_ASM_OBJECTS})
+add_library(Runtime.ServerGC STATIC
+    ${COMMON_RUNTIME_SOURCES}
+    ${FULL_RUNTIME_SOURCES}
+    ${RUNTIME_ARCH_ASM_OBJECTS}
+    $<TARGET_OBJECTS:Runtime.GC.Workstation>
+    $<TARGET_OBJECTS:Runtime.GC.Server>)
 add_dependencies(Runtime.ServerGC aot_eventing_headers)
 target_link_libraries(Runtime.ServerGC PRIVATE aotminipal)
 
@@ -44,7 +60,7 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
   add_library(Runtime.VxsortDisabled STATIC ${DUMMY_VXSORT_SOURCES})
 endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
 
-target_compile_definitions(Runtime.ServerGC PRIVATE -DFEATURE_SVR_GC)
+target_compile_definitions(Runtime.ServerGC PRIVATE FEATURE_SVR_GC)
 
 if (CLR_CMAKE_TARGET_WIN32)
   set_target_properties(aotminipal PROPERTIES
diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt
index 457cde2dc9e185..d9f280b3d2bf4b 100644
--- a/src/coreclr/vm/CMakeLists.txt
+++ b/src/coreclr/vm/CMakeLists.txt
@@ -507,11 +507,7 @@ set(GC_SOURCES_WKS
     ../gc/gcconfig.cpp
     ../gc/gccommon.cpp
     ../gc/gcscan.cpp
-    ../gc/gcsvr.cpp
-    ../gc/gcwks.cpp
     ../gc/gchandletable.cpp
-    ../gc/gceesvr.cpp
-    ../gc/gceewks.cpp
     ../gc/gcload.cpp
     ../gc/gcbridge.cpp
     ../gc/softwarewritewatch.cpp
@@ -1038,6 +1034,42 @@ convert_to_absolute_path(VM_SOURCES_WKS_ARCH_ASM ${VM_SOURCES_WKS_ARCH_ASM})
 convert_to_absolute_path(VM_SOURCES_DAC ${VM_SOURCES_DAC})
 convert_to_absolute_path(VM_SOURCES_WKS_SPECIAL ${VM_SOURCES_WKS_SPECIAL})
 
+set(GC_WKS_SVR_SOURCES
+    ../gc/gcee.cpp
+    ../gc/gc.cpp
+    ../gc/init.cpp
+    ../gc/no_gc.cpp
+    ../gc/finalization.cpp
+    ../gc/dynamic_tuning.cpp
+    ../gc/region_free_list.cpp
+    ../gc/region_allocator.cpp
+    ../gc/memory.cpp
+    ../gc/sweep.cpp
+    ../gc/collect.cpp
+    ../gc/diagnostics.cpp
+    ../gc/dynamic_heap_count.cpp
+    ../gc/card_table.cpp
+    ../gc/relocate_compact.cpp
+    ../gc/mark_phase.cpp
+    ../gc/background.cpp
+    ../gc/interface.cpp
+    ../gc/allocation.cpp
+    ../gc/plan_phase.cpp
+    ../gc/regions_segments.cpp
+    )
+
+add_library_clr(vm_gc_wks OBJECT ${GC_WKS_SVR_SOURCES})
+target_precompile_headers(vm_gc_wks PRIVATE [["common.h"]])
+target_compile_definitions(vm_gc_wks PRIVATE GC_DESCRIPTOR)
+add_dependencies(vm_gc_wks eventing_headers)
+
+if(FEATURE_SVR_GC)
+    add_library_clr(vm_gc_svr OBJECT ${GC_WKS_SVR_SOURCES})
+    target_precompile_headers(vm_gc_svr PRIVATE [["common.h"]])
+    target_compile_definitions(vm_gc_svr PRIVATE GC_DESCRIPTOR SERVER_GC)
+    add_dependencies(vm_gc_svr eventing_headers)
+endif()
+
 if (NOT CLR_CMAKE_TARGET_ARCH_WASM)
     add_library_clr(cee_dac ${VM_SOURCES_DAC})
     add_dependencies(cee_dac eventing_headers)