@@ -1368,7 +1368,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x)
13681368 * that is not free, and 1 for any other page type.
13691369 * For 1 the page is returned with increased page count, otherwise not.
13701370 */
1371- static int get_any_page (struct page * p , unsigned long pfn , int flags )
1371+ static int __get_any_page (struct page * p , unsigned long pfn , int flags )
13721372{
13731373 int ret ;
13741374
@@ -1393,11 +1393,9 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
13931393 if (!get_page_unless_zero (compound_head (p ))) {
13941394 if (PageHuge (p )) {
13951395 pr_info ("%s: %#lx free huge page\n" , __func__ , pfn );
1396- ret = dequeue_hwpoisoned_huge_page ( compound_head ( p )) ;
1396+ ret = 0 ;
13971397 } else if (is_free_buddy_page (p )) {
13981398 pr_info ("%s: %#lx free buddy page\n" , __func__ , pfn );
1399- /* Set hwpoison bit while page is still isolated */
1400- SetPageHWPoison (p );
14011399 ret = 0 ;
14021400 } else {
14031401 pr_info ("%s: %#lx: unknown zero refcount page type %lx\n" ,
@@ -1413,23 +1411,48 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
14131411 return ret ;
14141412}
14151413
1414+ static int get_any_page (struct page * page , unsigned long pfn , int flags )
1415+ {
1416+ int ret = __get_any_page (page , pfn , flags );
1417+
1418+ if (ret == 1 && !PageHuge (page ) && !PageLRU (page )) {
1419+ /*
1420+ * Try to free it.
1421+ */
1422+ put_page (page );
1423+ shake_page (page , 1 );
1424+
1425+ /*
1426+ * Did it turn free?
1427+ */
1428+ ret = __get_any_page (page , pfn , 0 );
1429+ if (!PageLRU (page )) {
1430+ pr_info ("soft_offline: %#lx: unknown non LRU page type %lx\n" ,
1431+ pfn , page -> flags );
1432+ return - EIO ;
1433+ }
1434+ }
1435+ return ret ;
1436+ }
1437+
14161438static int soft_offline_huge_page (struct page * page , int flags )
14171439{
14181440 int ret ;
14191441 unsigned long pfn = page_to_pfn (page );
14201442 struct page * hpage = compound_head (page );
14211443
1444+ /*
1445+ * This double-check of PageHWPoison is to avoid the race with
1446+ * memory_failure(). See also comment in __soft_offline_page().
1447+ */
1448+ lock_page (hpage );
14221449 if (PageHWPoison (hpage )) {
1450+ unlock_page (hpage );
1451+ put_page (hpage );
14231452 pr_info ("soft offline: %#lx hugepage already poisoned\n" , pfn );
1424- ret = - EBUSY ;
1425- goto out ;
1453+ return - EBUSY ;
14261454 }
1427-
1428- ret = get_any_page (page , pfn , flags );
1429- if (ret < 0 )
1430- goto out ;
1431- if (ret == 0 )
1432- goto done ;
1455+ unlock_page (hpage );
14331456
14341457 /* Keep page count to indicate a given hugepage is isolated. */
14351458 ret = migrate_huge_page (hpage , new_page , MPOL_MF_MOVE_ALL , false,
@@ -1438,17 +1461,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
14381461 if (ret ) {
14391462 pr_info ("soft offline: %#lx: migration failed %d, type %lx\n" ,
14401463 pfn , ret , page -> flags );
1441- goto out ;
1464+ } else {
1465+ set_page_hwpoison_huge_page (hpage );
1466+ dequeue_hwpoisoned_huge_page (hpage );
1467+ atomic_long_add (1 << compound_trans_order (hpage ),
1468+ & num_poisoned_pages );
14421469 }
1443- done :
14441470 /* keep elevated page count for bad page */
1445- atomic_long_add (1 << compound_trans_order (hpage ), & num_poisoned_pages );
1446- set_page_hwpoison_huge_page (hpage );
1447- dequeue_hwpoisoned_huge_page (hpage );
1448- out :
14491471 return ret ;
14501472}
14511473
1474+ static int __soft_offline_page (struct page * page , int flags );
1475+
14521476/**
14531477 * soft_offline_page - Soft offline a page.
14541478 * @page: page to offline
@@ -1477,62 +1501,60 @@ int soft_offline_page(struct page *page, int flags)
14771501 unsigned long pfn = page_to_pfn (page );
14781502 struct page * hpage = compound_trans_head (page );
14791503
1480- if (PageHuge (page )) {
1481- ret = soft_offline_huge_page ( page , flags );
1482- goto out ;
1504+ if (PageHWPoison (page )) {
1505+ pr_info ( "soft offline: %#lx page already poisoned\n" , pfn );
1506+ return - EBUSY ;
14831507 }
1484- if (PageTransHuge (hpage )) {
1508+ if (! PageHuge ( page ) && PageTransHuge (hpage )) {
14851509 if (PageAnon (hpage ) && unlikely (split_huge_page (hpage ))) {
14861510 pr_info ("soft offline: %#lx: failed to split THP\n" ,
14871511 pfn );
1488- ret = - EBUSY ;
1489- goto out ;
1512+ return - EBUSY ;
14901513 }
14911514 }
14921515
1493- if (PageHWPoison (page )) {
1494- pr_info ("soft offline: %#lx page already poisoned\n" , pfn );
1495- ret = - EBUSY ;
1496- goto out ;
1497- }
1498-
14991516 ret = get_any_page (page , pfn , flags );
15001517 if (ret < 0 )
1501- goto out ;
1502- if (ret == 0 )
1503- goto done ;
1504-
1505- /*
1506- * Page cache page we can handle?
1507- */
1508- if (!PageLRU (page )) {
1509- /*
1510- * Try to free it.
1511- */
1512- put_page (page );
1513- shake_page (page , 1 );
1514-
1515- /*
1516- * Did it turn free?
1517- */
1518- ret = get_any_page (page , pfn , 0 );
1519- if (ret < 0 )
1520- goto out ;
1521- if (ret == 0 )
1522- goto done ;
1523- }
1524- if (!PageLRU (page )) {
1525- pr_info ("soft_offline: %#lx: unknown non LRU page type %lx\n" ,
1526- pfn , page -> flags );
1527- ret = - EIO ;
1528- goto out ;
1518+ return ret ;
1519+ if (ret ) { /* for in-use pages */
1520+ if (PageHuge (page ))
1521+ ret = soft_offline_huge_page (page , flags );
1522+ else
1523+ ret = __soft_offline_page (page , flags );
1524+ } else { /* for free pages */
1525+ if (PageHuge (page )) {
1526+ set_page_hwpoison_huge_page (hpage );
1527+ dequeue_hwpoisoned_huge_page (hpage );
1528+ atomic_long_add (1 << compound_trans_order (hpage ),
1529+ & num_poisoned_pages );
1530+ } else {
1531+ SetPageHWPoison (page );
1532+ atomic_long_inc (& num_poisoned_pages );
1533+ }
15291534 }
1535+ /* keep elevated page count for bad page */
1536+ return ret ;
1537+ }
1538+
1539+ static int __soft_offline_page (struct page * page , int flags )
1540+ {
1541+ int ret ;
1542+ unsigned long pfn = page_to_pfn (page );
15301543
15311544 /*
1532- * Synchronized using the page lock with memory_failure()
1545+ * Check PageHWPoison again inside page lock because PageHWPoison
1546+ * is set by memory_failure() outside page lock. Note that
1547+ * memory_failure() also double-checks PageHWPoison inside page lock,
1548+ * so there's no race between soft_offline_page() and memory_failure().
15331549 */
15341550 lock_page (page );
15351551 wait_on_page_writeback (page );
1552+ if (PageHWPoison (page )) {
1553+ unlock_page (page );
1554+ put_page (page );
1555+ pr_info ("soft offline: %#lx page already poisoned\n" , pfn );
1556+ return - EBUSY ;
1557+ }
15361558 /*
15371559 * Try to invalidate first. This should work for
15381560 * non dirty unmapped page cache pages.
@@ -1545,9 +1567,10 @@ int soft_offline_page(struct page *page, int flags)
15451567 */
15461568 if (ret == 1 ) {
15471569 put_page (page );
1548- ret = 0 ;
15491570 pr_info ("soft_offline: %#lx: invalidated\n" , pfn );
1550- goto done ;
1571+ SetPageHWPoison (page );
1572+ atomic_long_inc (& num_poisoned_pages );
1573+ return 0 ;
15511574 }
15521575
15531576 /*
@@ -1575,18 +1598,13 @@ int soft_offline_page(struct page *page, int flags)
15751598 pfn , ret , page -> flags );
15761599 if (ret > 0 )
15771600 ret = - EIO ;
1601+ } else {
1602+ SetPageHWPoison (page );
1603+ atomic_long_inc (& num_poisoned_pages );
15781604 }
15791605 } else {
15801606 pr_info ("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n" ,
15811607 pfn , ret , page_count (page ), page -> flags );
15821608 }
1583- if (ret )
1584- goto out ;
1585-
1586- done :
1587- /* keep elevated page count for bad page */
1588- atomic_long_inc (& num_poisoned_pages );
1589- SetPageHWPoison (page );
1590- out :
15911609 return ret ;
15921610}
0 commit comments