Commit d4ae9916 authored by Naoya Horiguchi's avatar Naoya Horiguchi Committed by Linus Torvalds
Browse files

mm: soft-offline: close the race against page allocation

A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline.  This is
undesirable because soft-offline (which is about corrected error) is
less aggressive than hard-offline (which is about uncorrected error),
and we can make soft-offline fail and keep using the page for good
reason like "system is busy."

Two main changes of this patch are:

- setting migrate type of the target page to MIGRATE_ISOLATE. As done
  in free_unref_page_commit(), this makes kernel bypass pcplist when
  freeing the page. So we can assume that the page is in freelist just
  after put_page() returns,

- setting PG_hwpoison on free page under zone->lock which protects
  freelists, so this allows us to avoid setting PG_hwpoison on a page
  that is decided to be allocated soon.

[akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment]
Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com


Signed-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: default avatarXishi Qiu <xishi.qiuxishi@alibaba-inc.com>
Tested-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <zy.zhengyi@alibaba-inc.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 6bc9b564
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -369,8 +369,13 @@ PAGEFLAG_FALSE(Uncached)
PAGEFLAG(HWPoison, hwpoison, PF_ANY)
TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
extern bool set_hwpoison_free_buddy_page(struct page *page);
#else
PAGEFLAG_FALSE(HWPoison)
static inline bool set_hwpoison_free_buddy_page(struct page *page)
{
	return 0;
}
#define __PG_HWPOISON 0
#endif

+0 −10
Original line number Diff line number Diff line
@@ -340,11 +340,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
	return swp_type(entry) == SWP_HWPOISON;
}

static inline bool test_set_page_hwpoison(struct page *page)
{
	return TestSetPageHWPoison(page);
}

static inline void num_poisoned_pages_inc(void)
{
	atomic_long_inc(&num_poisoned_pages);
@@ -367,11 +362,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
	return 0;
}

static inline bool test_set_page_hwpoison(struct page *page)
{
	return false;
}

static inline void num_poisoned_pages_inc(void)
{
}
+21 −5
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@
#include <linux/mm_inline.h>
#include <linux/kfifo.h>
#include <linux/ratelimit.h>
#include <linux/page-isolation.h>
#include "internal.h"
#include "ras/ras_event.h"

@@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct page *page, int flags)
static int soft_offline_in_use_page(struct page *page, int flags)
{
	int ret;
	int mt;
	struct page *hpage = compound_head(page);

	if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(struct page *page, int flags)
		put_hwpoison_page(hpage);
	}

	/*
	 * Setting MIGRATE_ISOLATE here ensures that the page will be linked
	 * to free list immediately (not via pcplist) when released after
	 * successful page migration. Otherwise we can't guarantee that the
	 * page is really free after put_page() returns, so
	 * set_hwpoison_free_buddy_page() highly likely fails.
	 */
	mt = get_pageblock_migratetype(page);
	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
	if (PageHuge(page))
		ret = soft_offline_huge_page(page, flags);
	else
		ret = __soft_offline_page(page, flags);

	set_pageblock_migratetype(page, mt);
	return ret;
}

static void soft_offline_free_page(struct page *page)
static int soft_offline_free_page(struct page *page)
{
	int rc = 0;
	struct page *head = compound_head(page);

	if (PageHuge(head))
		rc = dissolve_free_huge_page(page);
	if (!rc && !TestSetPageHWPoison(page))
	if (!rc) {
		if (set_hwpoison_free_buddy_page(page))
			num_poisoned_pages_inc();
		else
			rc = -EBUSY;
	}
	return rc;
}

/**
@@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page, int flags)
	if (ret > 0)
		ret = soft_offline_in_use_page(page, flags);
	else if (ret == 0)
		soft_offline_free_page(page);
		ret = soft_offline_free_page(page);

	return ret;
}
+1 −1
Original line number Diff line number Diff line
@@ -1212,7 +1212,7 @@ out:
			 * intentionally. Although it's rather weird,
			 * it's how HWPoison flag works at the moment.
			 */
			if (!test_set_page_hwpoison(page))
			if (set_hwpoison_free_buddy_page(page))
				num_poisoned_pages_inc();
		}
	} else {
+30 −0
Original line number Diff line number Diff line
@@ -8096,3 +8096,33 @@ bool is_free_buddy_page(struct page *page)

	return order < MAX_ORDER;
}

#ifdef CONFIG_MEMORY_FAILURE
/*
 * Set PG_hwpoison flag if a given page is confirmed to be a free page.  This
 * test is performed under the zone lock to prevent a race against page
 * allocation.
 */
bool set_hwpoison_free_buddy_page(struct page *page)
{
	struct zone *zone = page_zone(page);
	unsigned long pfn = page_to_pfn(page);
	unsigned long flags;
	unsigned int order;
	bool hwpoisoned = false;

	spin_lock_irqsave(&zone->lock, flags);
	for (order = 0; order < MAX_ORDER; order++) {
		struct page *page_head = page - (pfn & ((1 << order) - 1));

		if (PageBuddy(page_head) && page_order(page_head) >= order) {
			if (!TestSetPageHWPoison(page))
				hwpoisoned = true;
			break;
		}
	}
	spin_unlock_irqrestore(&zone->lock, flags);

	return hwpoisoned;
}
#endif