@@ -133,6 +133,7 @@ enum {
133133
134134struct global_cwq ;
135135struct worker_pool ;
136+ struct idle_rebind ;
136137
137138/*
138139 * The poor guys doing the actual heavy lifting. All on-duty workers
@@ -154,7 +155,10 @@ struct worker {
154155 unsigned long last_active ; /* L: last active timestamp */
155156 unsigned int flags ; /* X: flags */
156157 int id ; /* I: worker id */
157- struct work_struct rebind_work ; /* L: rebind worker to cpu */
158+
159+ /* for rebinding worker to CPU */
160+ struct idle_rebind * idle_rebind ; /* L: for idle worker */
161+ struct work_struct rebind_work ; /* L: for busy worker */
158162};
159163
160164struct worker_pool {
@@ -190,6 +194,8 @@ struct global_cwq {
190194
191195 struct worker_pool pools [2 ]; /* normal and highpri pools */
192196
197+ wait_queue_head_t rebind_hold ; /* rebind hold wait */
198+
193199 struct task_struct * trustee ; /* L: for gcwq shutdown */
194200 unsigned int trustee_state ; /* L: trustee state */
195201 wait_queue_head_t trustee_wait ; /* trustee wait */
@@ -1314,13 +1320,37 @@ __acquires(&gcwq->lock)
13141320 }
13151321}
13161322
1323+ struct idle_rebind {
1324+ int cnt ; /* # workers to be rebound */
1325+ struct completion done ; /* all workers rebound */
1326+ };
1327+
1328+ /*
1329+ * Rebind an idle @worker to its CPU. During CPU onlining, this has to
1330+ * happen synchronously for idle workers. worker_thread() will test
1331+ * %WORKER_REBIND before leaving idle and call this function.
1332+ */
1333+ static void idle_worker_rebind (struct worker * worker )
1334+ {
1335+ struct global_cwq * gcwq = worker -> pool -> gcwq ;
1336+
1337+ /* CPU must be online at this point */
1338+ WARN_ON (!worker_maybe_bind_and_lock (worker ));
1339+ if (!-- worker -> idle_rebind -> cnt )
1340+ complete (& worker -> idle_rebind -> done );
1341+ spin_unlock_irq (& worker -> pool -> gcwq -> lock );
1342+
1343+ /* we did our part, wait for rebind_workers() to finish up */
1344+ wait_event (gcwq -> rebind_hold , !(worker -> flags & WORKER_REBIND ));
1345+ }
1346+
13171347/*
1318- * Function for worker->rebind_work used to rebind unbound busy workers to
1348+ * Function for @ worker->rebind.work used to rebind unbound busy workers to
13191349 * the associated cpu which is coming back online. This is scheduled by
13201350 * cpu up but can race with other cpu hotplug operations and may be
13211351 * executed twice without intervening cpu down.
13221352 */
1323- static void worker_rebind_fn (struct work_struct * work )
1353+ static void busy_worker_rebind_fn (struct work_struct * work )
13241354{
13251355 struct worker * worker = container_of (work , struct worker , rebind_work );
13261356 struct global_cwq * gcwq = worker -> pool -> gcwq ;
@@ -1331,6 +1361,112 @@ static void worker_rebind_fn(struct work_struct *work)
13311361 spin_unlock_irq (& gcwq -> lock );
13321362}
13331363
1364+ /**
1365+ * rebind_workers - rebind all workers of a gcwq to the associated CPU
1366+ * @gcwq: gcwq of interest
1367+ *
1368+ * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding
1369+ * is different for idle and busy ones.
1370+ *
1371+ * The idle ones should be rebound synchronously and idle rebinding should
1372+ * be complete before any worker starts executing work items with
1373+ * concurrency management enabled; otherwise, scheduler may oops trying to
1374+ * wake up non-local idle worker from wq_worker_sleeping().
1375+ *
1376+ * This is achieved by repeatedly requesting rebinding until all idle
1377+ * workers are known to have been rebound under @gcwq->lock and holding all
1378+ * idle workers from becoming busy until idle rebinding is complete.
1379+ *
1380+ * Once idle workers are rebound, busy workers can be rebound as they
1381+ * finish executing their current work items. Queueing the rebind work at
1382+ * the head of their scheduled lists is enough. Note that nr_running will
1383+ * be properbly bumped as busy workers rebind.
1384+ *
1385+ * On return, all workers are guaranteed to either be bound or have rebind
1386+ * work item scheduled.
1387+ */
1388+ static void rebind_workers (struct global_cwq * gcwq )
1389+ __releases (& gcwq - > lock ) __acquires (& gcwq - > lock )
1390+ {
1391+ struct idle_rebind idle_rebind ;
1392+ struct worker_pool * pool ;
1393+ struct worker * worker ;
1394+ struct hlist_node * pos ;
1395+ int i ;
1396+
1397+ lockdep_assert_held (& gcwq -> lock );
1398+
1399+ for_each_worker_pool (pool , gcwq )
1400+ lockdep_assert_held (& pool -> manager_mutex );
1401+
1402+ /*
1403+ * Rebind idle workers. Interlocked both ways. We wait for
1404+ * workers to rebind via @idle_rebind.done. Workers will wait for
1405+ * us to finish up by watching %WORKER_REBIND.
1406+ */
1407+ init_completion (& idle_rebind .done );
1408+ retry :
1409+ idle_rebind .cnt = 1 ;
1410+ INIT_COMPLETION (idle_rebind .done );
1411+
1412+ /* set REBIND and kick idle ones, we'll wait for these later */
1413+ for_each_worker_pool (pool , gcwq ) {
1414+ list_for_each_entry (worker , & pool -> idle_list , entry ) {
1415+ if (worker -> flags & WORKER_REBIND )
1416+ continue ;
1417+
1418+ /* morph UNBOUND to REBIND */
1419+ worker -> flags &= ~WORKER_UNBOUND ;
1420+ worker -> flags |= WORKER_REBIND ;
1421+
1422+ idle_rebind .cnt ++ ;
1423+ worker -> idle_rebind = & idle_rebind ;
1424+
1425+ /* worker_thread() will call idle_worker_rebind() */
1426+ wake_up_process (worker -> task );
1427+ }
1428+ }
1429+
1430+ if (-- idle_rebind .cnt ) {
1431+ spin_unlock_irq (& gcwq -> lock );
1432+ wait_for_completion (& idle_rebind .done );
1433+ spin_lock_irq (& gcwq -> lock );
1434+ /* busy ones might have become idle while waiting, retry */
1435+ goto retry ;
1436+ }
1437+
1438+ /*
1439+ * All idle workers are rebound and waiting for %WORKER_REBIND to
1440+ * be cleared inside idle_worker_rebind(). Clear and release.
1441+ * Clearing %WORKER_REBIND from this foreign context is safe
1442+ * because these workers are still guaranteed to be idle.
1443+ */
1444+ for_each_worker_pool (pool , gcwq )
1445+ list_for_each_entry (worker , & pool -> idle_list , entry )
1446+ worker -> flags &= ~WORKER_REBIND ;
1447+
1448+ wake_up_all (& gcwq -> rebind_hold );
1449+
1450+ /* rebind busy workers */
1451+ for_each_busy_worker (worker , i , pos , gcwq ) {
1452+ struct work_struct * rebind_work = & worker -> rebind_work ;
1453+
1454+ /* morph UNBOUND to REBIND */
1455+ worker -> flags &= ~WORKER_UNBOUND ;
1456+ worker -> flags |= WORKER_REBIND ;
1457+
1458+ if (test_and_set_bit (WORK_STRUCT_PENDING_BIT ,
1459+ work_data_bits (rebind_work )))
1460+ continue ;
1461+
1462+ /* wq doesn't matter, use the default one */
1463+ debug_work_activate (rebind_work );
1464+ insert_work (get_cwq (gcwq -> cpu , system_wq ), rebind_work ,
1465+ worker -> scheduled .next ,
1466+ work_color_to_flags (WORK_NO_COLOR ));
1467+ }
1468+ }
1469+
13341470static struct worker * alloc_worker (void )
13351471{
13361472 struct worker * worker ;
@@ -1339,7 +1475,7 @@ static struct worker *alloc_worker(void)
13391475 if (worker ) {
13401476 INIT_LIST_HEAD (& worker -> entry );
13411477 INIT_LIST_HEAD (& worker -> scheduled );
1342- INIT_WORK (& worker -> rebind_work , worker_rebind_fn );
1478+ INIT_WORK (& worker -> rebind_work , busy_worker_rebind_fn );
13431479 /* on creation a worker is in !idle && prep state */
13441480 worker -> flags = WORKER_PREP ;
13451481 }
@@ -1829,6 +1965,9 @@ __acquires(&gcwq->lock)
18291965
18301966 lockdep_copy_map (& lockdep_map , & work -> lockdep_map );
18311967#endif
1968+ WARN_ON_ONCE (!(worker -> flags & (WORKER_UNBOUND | WORKER_REBIND )) &&
1969+ raw_smp_processor_id () != gcwq -> cpu );
1970+
18321971 /*
18331972 * A single work shouldn't be executed concurrently by
18341973 * multiple workers on a single cpu. Check whether anyone is
@@ -1946,11 +2085,20 @@ static int worker_thread(void *__worker)
19462085woke_up :
19472086 spin_lock_irq (& gcwq -> lock );
19482087
1949- /* DIE can be set only while we're idle, checking here is enough */
1950- if (worker -> flags & WORKER_DIE ) {
2088+ /*
2089+ * DIE can be set only while idle and REBIND set while busy has
2090+ * @worker->rebind_work scheduled. Checking here is enough.
2091+ */
2092+ if (unlikely (worker -> flags & (WORKER_REBIND | WORKER_DIE ))) {
19512093 spin_unlock_irq (& gcwq -> lock );
1952- worker -> task -> flags &= ~PF_WQ_WORKER ;
1953- return 0 ;
2094+
2095+ if (worker -> flags & WORKER_DIE ) {
2096+ worker -> task -> flags &= ~PF_WQ_WORKER ;
2097+ return 0 ;
2098+ }
2099+
2100+ idle_worker_rebind (worker );
2101+ goto woke_up ;
19542102 }
19552103
19562104 worker_leave_idle (worker );
@@ -3468,42 +3616,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
34683616 }
34693617 } while (i && rc >= 0 );
34703618
3471- /*
3472- * At this point, either draining has completed and no worker
3473- * is left, or cpu down has been canceled or the cpu is being
3474- * brought back up. There shouldn't be any idle one left.
3475- * Tell the remaining busy ones to rebind once it finishes the
3476- * currently scheduled works by scheduling the rebind_work.
3477- */
3478- for_each_worker_pool (pool , gcwq )
3479- WARN_ON (!list_empty (& pool -> idle_list ));
3480-
3481- /* if we're reassociating, clear DISASSOCIATED */
3482- if (gcwq -> trustee_state == TRUSTEE_RELEASE )
3483- gcwq -> flags &= ~GCWQ_DISASSOCIATED ;
3484-
3485- for_each_busy_worker (worker , i , pos , gcwq ) {
3486- struct work_struct * rebind_work = & worker -> rebind_work ;
3487-
3488- /*
3489- * Rebind_work may race with future cpu hotplug
3490- * operations. Use a separate flag to mark that
3491- * rebinding is scheduled.
3492- */
3493- worker -> flags |= WORKER_REBIND ;
3494- worker -> flags &= ~WORKER_UNBOUND ;
3495-
3496- /* queue rebind_work, wq doesn't matter, use the default one */
3497- if (test_and_set_bit (WORK_STRUCT_PENDING_BIT ,
3498- work_data_bits (rebind_work )))
3499- continue ;
3500-
3501- debug_work_activate (rebind_work );
3502- insert_work (get_cwq (gcwq -> cpu , system_wq ), rebind_work ,
3503- worker -> scheduled .next ,
3504- work_color_to_flags (WORK_NO_COLOR ));
3505- }
3506-
35073619 gcwq_release_management (gcwq );
35083620
35093621 /* notify completion */
@@ -3609,13 +3721,16 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
36093721 wait_trustee_state (gcwq , TRUSTEE_DONE );
36103722 }
36113723
3612- /*
3613- * Either DISASSOCIATED is already cleared or no worker is
3614- * left on the gcwq. Safe to clear DISASSOCIATED without
3615- * claiming managers.
3616- */
3724+ spin_unlock_irq (& gcwq -> lock );
3725+ gcwq_claim_management (gcwq );
3726+ spin_lock_irq (& gcwq -> lock );
3727+
36173728 gcwq -> flags &= ~GCWQ_DISASSOCIATED ;
36183729
3730+ rebind_workers (gcwq );
3731+
3732+ gcwq_release_management (gcwq );
3733+
36193734 /*
36203735 * Trustee is done and there might be no worker left.
36213736 * Put the first_idle in and request a real manager to
@@ -3910,6 +4025,8 @@ static int __init init_workqueues(void)
39104025 ida_init (& pool -> worker_ida );
39114026 }
39124027
4028+ init_waitqueue_head (& gcwq -> rebind_hold );
4029+
39134030 gcwq -> trustee_state = TRUSTEE_DONE ;
39144031 init_waitqueue_head (& gcwq -> trustee_wait );
39154032 }
0 commit comments