Commit 006d3ff2 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds
Browse files

mm/huge_memory: fix lockdep complaint on 32-bit i_size_read()

Huge tmpfs testing, on 32-bit kernel with lockdep enabled, showed that
__split_huge_page() was using i_size_read() while holding the irq-safe
lru_lock and page tree lock, but the 32-bit i_size_read() uses an
irq-unsafe seqlock which should not be nested inside them.

Instead, read the i_size earlier in split_huge_page_to_list(), and pass
the end offset down to __split_huge_page(): all while holding head page
lock, which is enough to prevent truncation of that extent before the
page tree lock has been taken.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261520070.2275@eggly.anvils


Fixes: baa355fd ("thp: file pages support for split_huge_page()")
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 173d9d9f
Loading
Loading
Loading
Loading
+13 −6
Original line number Diff line number Diff line
@@ -2439,12 +2439,11 @@ static void __split_huge_page_tail(struct page *head, int tail,
}

static void __split_huge_page(struct page *page, struct list_head *list,
		unsigned long flags)
		pgoff_t end, unsigned long flags)
{
	struct page *head = compound_head(page);
	struct zone *zone = page_zone(head);
	struct lruvec *lruvec;
	pgoff_t end = -1;
	int i;

	lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
@@ -2452,9 +2451,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
	/* complete memcg works before add pages to LRU */
	mem_cgroup_split_huge_fixup(head);

	if (!PageAnon(page))
		end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);

	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
		__split_huge_page_tail(head, i, lruvec, list);
		/* Some pages can be beyond i_size: drop them from page cache */
@@ -2626,6 +2622,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
	int count, mapcount, extra_pins, ret;
	bool mlocked;
	unsigned long flags;
	pgoff_t end;

	VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
	VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -2648,6 +2645,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
			ret = -EBUSY;
			goto out;
		}
		end = -1;
		mapping = NULL;
		anon_vma_lock_write(anon_vma);
	} else {
@@ -2661,6 +2659,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)

		anon_vma = NULL;
		i_mmap_lock_read(mapping);

		/*
		 *__split_huge_page() may need to trim off pages beyond EOF:
		 * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
		 * which cannot be nested inside the page tree lock. So note
		 * end now: i_size itself may be changed at any moment, but
		 * head page lock is good enough to serialize the trimming.
		 */
		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
	}

	/*
@@ -2707,7 +2714,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
		if (mapping)
			__dec_node_page_state(page, NR_SHMEM_THPS);
		spin_unlock(&pgdata->split_queue_lock);
		__split_huge_page(page, list, flags);
		__split_huge_page(page, list, end, flags);
		if (PageSwapCache(head)) {
			swp_entry_t entry = { .val = page_private(head) };