Skip to content

Commit 6a51b8f

Browse files
Maoni0mikelle-rogers
authored andcommitted
DATAS BGC thread synchronization fix (dotnet#109804)
this problem with BGC thread synchronization is similar to the SVR GC one I fixed but has its own complications due to the fact BGC threads are only created on demands. it can cause the following symptoms - + deadlock because a BGC thread is erroneously terminated so then the next BGC join cannot finish + AV because a BGC thread could be seeing settings.concurrent as true and actually running blocking GC code! I moved setting the idle event code to be inside a GC instead of outside because we don't necessarily trigger a BGC in-between HC changes. and if we don't, we'd need to compensate for the idle count that we deducted and this is difficult to track. moving this to the place where we already decided to do a BGC makes it much simpler. I am setting all the required idle events sequentially instead of per heap (there isn't a convenient way to set it per heap without introducing yet more sync cost) which does incur some perf cost - however this cost is small and we only need to pay for it when all the following conditions are true - 1) we actually changed HC; 2) we did do a BGC that observed this HC change; 3) we need to wake up threads that participated in the last BGC. also added some stress code to help with reproing the problem and stressing the new code; otherwise it's very difficult to repro this kind of problems.
1 parent 992aa0b commit 6a51b8f

File tree

3 files changed

+177
-14
lines changed

3 files changed

+177
-14
lines changed

src/coreclr/gc/gc.cpp

Lines changed: 159 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,10 +2927,14 @@ bool gc_heap::trigger_initial_gen2_p = false;
29272927

29282928
#ifdef BACKGROUND_GC
29292929
bool gc_heap::trigger_bgc_for_rethreading_p = false;
2930+
int gc_heap::total_bgc_threads = 0;
2931+
int gc_heap::last_bgc_n_heaps = 0;
2932+
int gc_heap::last_total_bgc_threads = 0;
29302933
#endif //BACKGROUND_GC
29312934

29322935
#ifdef STRESS_DYNAMIC_HEAP_COUNT
29332936
int gc_heap::heaps_in_this_gc = 0;
2937+
int gc_heap::bgc_to_ngc2_ratio = 0;
29342938
#endif //STRESS_DYNAMIC_HEAP_COUNT
29352939
#endif // DYNAMIC_HEAP_COUNT
29362940

@@ -14259,6 +14263,11 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
1425914263

1426014264
if ((dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && (conserve_mem_setting == 0))
1426114265
conserve_mem_setting = 5;
14266+
14267+
#ifdef STRESS_DYNAMIC_HEAP_COUNT
14268+
bgc_to_ngc2_ratio = (int)GCConfig::GetGCDBGCRatio();
14269+
dprintf (1, ("bgc_to_ngc2_ratio is %d", bgc_to_ngc2_ratio));
14270+
#endif
1426214271
#endif //DYNAMIC_HEAP_COUNT
1426314272

1426414273
if (conserve_mem_setting < 0)
@@ -21148,6 +21157,18 @@ int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation,
2114821157
if (!((n == max_generation) && *blocking_collection_p))
2114921158
{
2115021159
n = max_generation;
21160+
21161+
#ifdef STRESS_DYNAMIC_HEAP_COUNT
21162+
if (bgc_to_ngc2_ratio)
21163+
{
21164+
int r = (int)gc_rand::get_rand ((bgc_to_ngc2_ratio + 1) * 10);
21165+
dprintf (6666, ("%d - making this full GC %s", r, ((r < 10) ? "NGC2" : "BGC")));
21166+
if (r < 10)
21167+
{
21168+
*blocking_collection_p = TRUE;
21169+
}
21170+
}
21171+
#endif //STRESS_DYNAMIC_HEAP_COUNT
2115121172
}
2115221173
}
2115321174
}
@@ -24409,12 +24430,37 @@ void gc_heap::garbage_collect (int n)
2440924430
size_t saved_bgc_th_count_creation_failed = bgc_th_count_creation_failed;
2441024431
#endif //DYNAMIC_HEAP_COUNT
2441124432

24433+
// This is the count of threads that GCToEEInterface::CreateThread reported successful for.
24434+
int total_bgc_threads_running = 0;
2441224435
for (int i = 0; i < n_heaps; i++)
2441324436
{
24414-
prepare_bgc_thread (g_heaps[i]);
24437+
gc_heap* hp = g_heaps[i];
24438+
if (prepare_bgc_thread (hp))
24439+
{
24440+
assert (hp->bgc_thread_running);
24441+
if (!hp->bgc_thread_running)
24442+
{
24443+
dprintf (6666, ("h%d prepare succeeded but running is still false!", i));
24444+
GCToOSInterface::DebugBreak();
24445+
}
24446+
total_bgc_threads_running++;
24447+
}
24448+
else
24449+
{
24450+
break;
24451+
}
2441524452
}
2441624453

2441724454
#ifdef DYNAMIC_HEAP_COUNT
24455+
// Even if we don't do a BGC, we need to record how many threads were successfully created because those will
24456+
// be running.
24457+
total_bgc_threads = max (total_bgc_threads, total_bgc_threads_running);
24458+
24459+
if (total_bgc_threads_running != n_heaps)
24460+
{
24461+
dprintf (6666, ("wanted to have %d BGC threads but only have %d", n_heaps, total_bgc_threads_running));
24462+
}
24463+
2441824464
add_to_bgc_th_creation_history (current_gc_index,
2441924465
(bgc_th_count_created - saved_bgc_th_count_created),
2442024466
(bgc_th_count_created_th_existed - saved_bgc_th_count_created_th_existed),
@@ -24446,7 +24492,15 @@ void gc_heap::garbage_collect (int n)
2444624492
for (int i = 0; i < n_heaps; i++)
2444724493
{
2444824494
gc_heap* hp = g_heaps[i];
24449-
if (!(hp->bgc_thread) || !hp->commit_mark_array_bgc_init())
24495+
24496+
if (!(hp->bgc_thread_running))
24497+
{
24498+
assert (!(hp->bgc_thread));
24499+
}
24500+
24501+
// In theory we could be in a situation where bgc_thread_running is false but bgc_thread is non NULL. We don't
24502+
// support this scenario so don't do a BGC.
24503+
if (!(hp->bgc_thread_running && hp->bgc_thread && hp->commit_mark_array_bgc_init()))
2445024504
{
2445124505
do_concurrent_p = FALSE;
2445224506
break;
@@ -24466,8 +24520,37 @@ void gc_heap::garbage_collect (int n)
2446624520
}
2446724521
#endif //MULTIPLE_HEAPS
2446824522

24523+
#ifdef DYNAMIC_HEAP_COUNT
24524+
dprintf (6666, ("last BGC saw %d heaps and %d total threads, currently %d heaps and %d total threads, %s BGC",
24525+
last_bgc_n_heaps, last_total_bgc_threads, n_heaps, total_bgc_threads, (do_concurrent_p ? "doing" : "not doing")));
24526+
#endif //DYNAMIC_HEAP_COUNT
24527+
2446924528
if (do_concurrent_p)
2447024529
{
24530+
#ifdef DYNAMIC_HEAP_COUNT
24531+
int diff = n_heaps - last_bgc_n_heaps;
24532+
if (diff > 0)
24533+
{
24534+
int saved_idle_bgc_thread_count = dynamic_heap_count_data.idle_bgc_thread_count;
24535+
int max_idle_event_count = min (n_heaps, last_total_bgc_threads);
24536+
int idle_events_to_set = max_idle_event_count - last_bgc_n_heaps;
24537+
if (idle_events_to_set > 0)
24538+
{
24539+
Interlocked::ExchangeAdd (&dynamic_heap_count_data.idle_bgc_thread_count, -idle_events_to_set);
24540+
dprintf (6666, ("%d BGC threads exist, setting %d idle events for h%d-h%d, total idle %d -> %d",
24541+
total_bgc_threads, idle_events_to_set, last_bgc_n_heaps, (last_bgc_n_heaps + idle_events_to_set - 1),
24542+
saved_idle_bgc_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count)));
24543+
for (int heap_idx = last_bgc_n_heaps; heap_idx < max_idle_event_count; heap_idx++)
24544+
{
24545+
g_heaps[heap_idx]->bgc_idle_thread_event.Set();
24546+
}
24547+
}
24548+
}
24549+
24550+
last_bgc_n_heaps = n_heaps;
24551+
last_total_bgc_threads = total_bgc_threads;
24552+
#endif //DYNAMIC_HEAP_COUNT
24553+
2447124554
#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
2447224555
SoftwareWriteWatch::EnableForGCHeap();
2447324556
#endif //FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
@@ -25995,9 +26078,6 @@ void gc_heap::check_heap_count ()
2599526078
for (int heap_idx = n_heaps; heap_idx < new_n_heaps; heap_idx++)
2599626079
{
2599726080
g_heaps[heap_idx]->gc_idle_thread_event.Set();
25998-
#ifdef BACKGROUND_GC
25999-
g_heaps[heap_idx]->bgc_idle_thread_event.Set();
26000-
#endif //BACKGROUND_GC
2600126081
}
2600226082
}
2600326083

@@ -37712,6 +37792,19 @@ void gc_heap::gc_thread_stub (void* arg)
3771237792
void gc_heap::bgc_thread_stub (void* arg)
3771337793
{
3771437794
gc_heap* heap = (gc_heap*)arg;
37795+
37796+
#ifdef STRESS_DYNAMIC_HEAP_COUNT
37797+
// We should only do this every so often; otherwise we'll never be able to do a BGC
37798+
int r = (int)gc_rand::get_rand (30);
37799+
bool wait_p = (r < 10);
37800+
37801+
if (wait_p)
37802+
{
37803+
GCToOSInterface::Sleep (100);
37804+
}
37805+
dprintf (6666, ("h%d %s", heap->heap_number, (wait_p ? "waited" : "did not wait")));
37806+
#endif
37807+
3771537808
heap->bgc_thread = GCToEEInterface::GetThread();
3771637809
assert(heap->bgc_thread != nullptr);
3771737810
heap->bgc_thread_function();
@@ -39520,6 +39613,8 @@ void gc_heap::add_to_bgc_th_creation_history (size_t gc_index, size_t count_crea
3952039613
}
3952139614
#endif //DYNAMIC_HEAP_COUNT
3952239615

39616+
// If this returns TRUE, we are saying we expect that thread to be there. However, when that thread is available to work is indeterministic.
39617+
// But when we actually start a BGC, naturally we'll need to wait till it gets to the point it can work.
3952339618
BOOL gc_heap::prepare_bgc_thread(gc_heap* gh)
3952439619
{
3952539620
BOOL success = FALSE;
@@ -39531,7 +39626,19 @@ BOOL gc_heap::prepare_bgc_thread(gc_heap* gh)
3953139626
dprintf (2, ("GC thread not running"));
3953239627
if (gh->bgc_thread == 0)
3953339628
{
39629+
#ifdef STRESS_DYNAMIC_HEAP_COUNT
39630+
// to stress, we just don't actually try to create the thread to simulate a failure
39631+
int r = (int)gc_rand::get_rand (100);
39632+
bool try_to_create_p = (r > 10);
39633+
BOOL thread_created_p = (try_to_create_p ? create_bgc_thread (gh) : FALSE);
39634+
if (!thread_created_p)
39635+
{
39636+
dprintf (6666, ("h%d we failed to create the thread, %s", gh->heap_number, (try_to_create_p ? "tried" : "didn't try")));
39637+
}
39638+
if (thread_created_p)
39639+
#else //STRESS_DYNAMIC_HEAP_COUNT
3953439640
if (create_bgc_thread(gh))
39641+
#endif //STRESS_DYNAMIC_HEAP_COUNT
3953539642
{
3953639643
success = TRUE;
3953739644
thread_created = TRUE;
@@ -39549,8 +39656,11 @@ BOOL gc_heap::prepare_bgc_thread(gc_heap* gh)
3954939656
else
3955039657
{
3955139658
#ifdef DYNAMIC_HEAP_COUNT
39659+
// This would be a very unusual scenario where GCToEEInterface::CreateThread told us it failed yet the thread was created.
3955239660
bgc_th_count_created_th_existed++;
39661+
dprintf (6666, ("h%d we cannot have a thread that runs yet CreateThread reported it failed to create it", gh->heap_number));
3955339662
#endif //DYNAMIC_HEAP_COUNT
39663+
assert (!"GCToEEInterface::CreateThread returned FALSE yet the thread was created!");
3955439664
}
3955539665
}
3955639666
else
@@ -39748,7 +39858,7 @@ void gc_heap::bgc_thread_function()
3974839858
while (1)
3974939859
{
3975039860
// Wait for work to do...
39751-
dprintf (3, ("bgc thread: waiting..."));
39861+
dprintf (6666, ("h%d bgc thread: waiting...", heap_number));
3975239862

3975339863
cooperative_mode = enable_preemptive ();
3975439864
//current_thread->m_fPreemptiveGCDisabled = 0;
@@ -39797,36 +39907,71 @@ void gc_heap::bgc_thread_function()
3979739907
continue;
3979839908
}
3979939909
}
39910+
39911+
#ifdef STRESS_DYNAMIC_HEAP_COUNT
39912+
if (n_heaps <= heap_number)
39913+
{
39914+
uint32_t delay_ms = (uint32_t)gc_rand::get_rand (200);
39915+
GCToOSInterface::Sleep (delay_ms);
39916+
}
39917+
#endif //STRESS_DYNAMIC_HEAP_COUNT
39918+
3980039919
// if we signal the thread with no concurrent work to do -> exit
3980139920
if (!settings.concurrent)
3980239921
{
39803-
dprintf (3, ("no concurrent GC needed, exiting"));
39922+
dprintf (6666, ("h%d no concurrent GC needed, exiting", heap_number));
39923+
39924+
#ifdef STRESS_DYNAMIC_HEAP_COUNT
39925+
flush_gc_log (true);
39926+
GCToOSInterface::DebugBreak();
39927+
#endif
3980439928
break;
3980539929
}
39806-
gc_background_running = TRUE;
39807-
dprintf (2, (ThreadStressLog::gcStartBgcThread(), heap_number,
39808-
generation_free_list_space (generation_of (max_generation)),
39809-
generation_free_obj_space (generation_of (max_generation)),
39810-
dd_fragmentation (dynamic_data_of (max_generation))));
3981139930

3981239931
#ifdef DYNAMIC_HEAP_COUNT
3981339932
if (n_heaps <= heap_number)
3981439933
{
39934+
Interlocked::Increment (&dynamic_heap_count_data.idle_bgc_thread_count);
3981539935
add_to_bgc_hc_history (hc_record_bgc_inactive);
3981639936

3981739937
// this is the case where we have more background GC threads than heaps
3981839938
// - wait until we're told to continue...
39819-
dprintf (9999, ("BGC thread %d idle (%d heaps) (gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
39939+
dprintf (6666, ("BGC%Id h%d going idle (%d heaps), idle count is now %d",
39940+
VolatileLoadWithoutBarrier (&settings.gc_index), heap_number, n_heaps, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count)));
3982039941
bgc_idle_thread_event.Wait(INFINITE, FALSE);
39821-
dprintf (9999, ("BGC thread %d waking from idle (%d heaps) (gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
39942+
dprintf (6666, ("BGC%Id h%d woke from idle (%d heaps), idle count is now %d",
39943+
VolatileLoadWithoutBarrier (&settings.gc_index), heap_number, n_heaps, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count)));
3982239944
continue;
3982339945
}
3982439946
else
3982539947
{
39948+
if (heap_number == 0)
39949+
{
39950+
const int spin_count = 1024;
39951+
int idle_bgc_thread_count = total_bgc_threads - n_heaps;
39952+
dprintf (6666, ("n_heaps %d, total %d bgc threads, bgc idle should be %d and is %d",
39953+
n_heaps, total_bgc_threads, idle_bgc_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count)));
39954+
if (idle_bgc_thread_count != dynamic_heap_count_data.idle_bgc_thread_count)
39955+
{
39956+
dprintf (6666, ("current idle is %d, trying to get to %d",
39957+
VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_bgc_thread_count), idle_bgc_thread_count));
39958+
spin_and_wait (spin_count, (idle_bgc_thread_count == dynamic_heap_count_data.idle_bgc_thread_count));
39959+
}
39960+
}
39961+
3982639962
add_to_bgc_hc_history (hc_record_bgc_active);
3982739963
}
3982839964
#endif //DYNAMIC_HEAP_COUNT
3982939965

39966+
if (heap_number == 0)
39967+
{
39968+
gc_background_running = TRUE;
39969+
dprintf (6666, (ThreadStressLog::gcStartBgcThread(), heap_number,
39970+
generation_free_list_space (generation_of (max_generation)),
39971+
generation_free_obj_space (generation_of (max_generation)),
39972+
dd_fragmentation (dynamic_data_of (max_generation))));
39973+
}
39974+
3983039975
gc1();
3983139976

3983239977
#ifndef DOUBLY_LINKED_FL

src/coreclr/gc/gcconfig.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ class GCConfigStringHolder
142142
INT_CONFIG (GCSpinCountUnit, "GCSpinCountUnit", NULL, 0, "Specifies the spin count unit used by the GC.") \
143143
INT_CONFIG (GCDynamicAdaptationMode, "GCDynamicAdaptationMode", "System.GC.DynamicAdaptationMode", 1, "Enable the GC to dynamically adapt to application sizes.") \
144144
INT_CONFIG (GCDTargetTCP, "GCDTargetTCP", "System.GC.DTargetTCP", 0, "Specifies the target tcp for DATAS") \
145+
INT_CONFIG (GCDBGCRatio, " GCDBGCRatio", NULL, 0, "Specifies the ratio of BGC to NGC2 for HC change") \
145146
BOOL_CONFIG (GCCacheSizeFromSysConf, "GCCacheSizeFromSysConf", NULL, false, "Specifies using sysconf to retrieve the last level cache size for Unix.")
146147

147148
// This class is responsible for retreiving configuration information

src/coreclr/gc/gcpriv.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5171,6 +5171,9 @@ class gc_heap
51715171
int last_n_heaps;
51725172
// don't start a GC till we see (n_max_heaps - new_n_heaps) number of threads idling
51735173
VOLATILE(int32_t) idle_thread_count;
5174+
#ifdef BACKGROUND_GC
5175+
VOLATILE(int32_t) idle_bgc_thread_count;
5176+
#endif
51745177
bool init_only_p;
51755178

51765179
bool should_change_heap_count;
@@ -5198,6 +5201,17 @@ class gc_heap
51985201
// This is set when change_heap_count wants the next GC to be a BGC for rethreading gen2 FL
51995202
// and reset during that BGC.
52005203
PER_HEAP_ISOLATED_FIELD_MAINTAINED bool trigger_bgc_for_rethreading_p;
5204+
// BGC threads are created on demand but we don't destroy the ones we created. This
5205+
// is to track how many we've created. They may or may not be active depending on
5206+
// if they are needed.
5207+
PER_HEAP_ISOLATED_FIELD_MAINTAINED int total_bgc_threads;
5208+
5209+
// HC last BGC observed.
5210+
PER_HEAP_ISOLATED_FIELD_MAINTAINED int last_bgc_n_heaps;
5211+
// Number of total BGC threads last BGC observed. This tells us how many new BGC threads have
5212+
// been created since. Note that just because a BGC thread is created doesn't mean it's used.
5213+
// We can fail at committing mark array and not proceed with the BGC.
5214+
PER_HEAP_ISOLATED_FIELD_MAINTAINED int last_total_bgc_threads;
52015215
#endif //BACKGROUND_GC
52025216
#endif //DYNAMIC_HEAP_COUNT
52035217

@@ -5348,6 +5362,9 @@ class gc_heap
53485362

53495363
#ifdef DYNAMIC_HEAP_COUNT
53505364
PER_HEAP_ISOLATED_FIELD_INIT_ONLY int dynamic_adaptation_mode;
5365+
#ifdef STRESS_DYNAMIC_HEAP_COUNT
5366+
PER_HEAP_ISOLATED_FIELD_INIT_ONLY int bgc_to_ngc2_ratio;
5367+
#endif //STRESS_DYNAMIC_HEAP_COUNT
53515368
#endif //DYNAMIC_HEAP_COUNT
53525369

53535370
/********************************************/

0 commit comments

Comments
 (0)