Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (1c134b19) · Commits · 戴 / test

arch/x86/Kconfig

+1 −0

Original line number	Diff line number	Diff line
		@@ -1462,6 +1462,7 @@ config X86_PAE

		config X86_5LEVEL
		bool "Enable 5-level page tables support"
		default y
		select DYNAMIC_MEMORY_LAYOUT
		select SPARSEMEM_VMEMMAP
		depends on X86_64

arch/x86/include/asm/pgtable-3level.h

+24 −22

Original line number	Diff line number	Diff line
		@@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)

		#define pmd_read_atomic pmd_read_atomic
		/*
		* pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
		* a "*pmdp" dereference done by gcc. Problem is, in certain places
		* where pte_offset_map_lock is called, concurrent page faults are
		* pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
		* a "*pmdp" dereference done by GCC. Problem is, in certain places
		* where pte_offset_map_lock() is called, concurrent page faults are
		* allowed, if the mmap_sem is hold for reading. An example is mincore
		* vs page faults vs MADV_DONTNEED. On the page fault side
		* pmd_populate rightfully does a set_64bit, but if we're reading the
		* pmd_populate() rightfully does a set_64bit(), but if we're reading the
		* pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
		* because gcc will not read the 64bit of the pmd atomically. To fix
		* this all places running pmd_offset_map_lock() while holding the
		* because GCC will not read the 64-bit value of the pmd atomically.
		*
		* To fix this all places running pte_offset_map_lock() while holding the
		* mmap_sem in read mode, shall read the pmdp pointer using this
		* function to know if the pmd is null nor not, and in turn to know if
		* they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
		* function to know if the pmd is null or not, and in turn to know if
		* they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
		* operations.
		*
		* Without THP if the mmap_sem is hold for reading, the pmd can only
		* transition from null to not null while pmd_read_atomic runs. So
		* Without THP if the mmap_sem is held for reading, the pmd can only
		* transition from null to not null while pmd_read_atomic() runs. So
		* we can always return atomic pmd values with this function.
		*
		* With THP if the mmap_sem is hold for reading, the pmd can become
		* With THP if the mmap_sem is held for reading, the pmd can become
		* trans_huge or none or point to a pte (and in turn become "stable")
		* at any time under pmd_read_atomic. We could read it really
		* atomically here with a atomic64_read for the THP enabled case (and
		* at any time under pmd_read_atomic(). We could read it truly
		* atomically here with an atomic64_read() for the THP enabled case (and
		* it would be a whole lot simpler), but to avoid using cmpxchg8b we
		* only return an atomic pmdval if the low part of the pmdval is later
		* found stable (i.e. pointing to a pte). And we're returning a none
		* pmdval if the low part of the pmd is none. In some cases the high
		* and low part of the pmdval returned may not be consistent if THP is
		* enabled (the low part may point to previously mapped hugepage,
		* while the high part may point to a more recently mapped hugepage),
		* but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
		* of the pmd to be read atomically to decide if the pmd is unstable
		* or not, with the only exception of when the low part of the pmd is
		* zero in which case we return a none pmd.
		* found to be stable (i.e. pointing to a pte). We are also returning a
		* 'none' (zero) pmdval if the low part of the pmd is zero.
		*
		* In some cases the high and low part of the pmdval returned may not be
		* consistent if THP is enabled (the low part may point to previously
		* mapped hugepage, while the high part may point to a more recently
		* mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
		* needs the low part of the pmd to be read atomically to decide if the
		* pmd is unstable or not, with the only exception when the low part
		* of the pmd is zero, in which case we return a 'none' pmd.
		*/
		static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
		{

arch/x86/kernel/cpu/intel.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -819,7 +819,7 @@ static const struct _tlb_table intel_tlb_table[] = {
		{ 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
		{ 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
		{ 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
		{ 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" },
		{ 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" },
		{ 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
		{ 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
		{ 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
		@@ -847,7 +847,7 @@ static const struct _tlb_table intel_tlb_table[] = {
		{ 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
		{ 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
		{ 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" },
		{ 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" },
		{ 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" },
		{ 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
		{ 0x00, 0, 0 }
		};
		@@ -859,7 +859,7 @@ static void intel_tlb_lookup(const unsigned char desc)
		return;

		/* look up this descriptor in the table */
		for (k = 0; intel_tlb_table[k].descriptor != desc && \
		for (k = 0; intel_tlb_table[k].descriptor != desc &&
		intel_tlb_table[k].descriptor != 0; k++)
		;

arch/x86/mm/Makefile

+1 −1

Original line number	Diff line number	Diff line
		@@ -23,7 +23,7 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp)

		CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace

		obj-$(CONFIG_X86_PAT) += pat_rbtree.o
		obj-$(CONFIG_X86_PAT) += pat_interval.o

		obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o

arch/x86/mm/pat.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -603,7 +603,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,

		spin_lock(&memtype_lock);

		err = rbt_memtype_check_insert(new, new_type);
		err = memtype_check_insert(new, new_type);
		if (err) {
		pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
		start, end - 1,
		@@ -650,7 +650,7 @@ int free_memtype(u64 start, u64 end)
		}

		spin_lock(&memtype_lock);
		entry = rbt_memtype_erase(start, end);
		entry = memtype_erase(start, end);
		spin_unlock(&memtype_lock);

		if (IS_ERR(entry)) {
		@@ -693,7 +693,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)

		spin_lock(&memtype_lock);

		entry = rbt_memtype_lookup(paddr);
		entry = memtype_lookup(paddr);
		if (entry != NULL)
		rettype = entry->type;
		else
		@@ -1109,7 +1109,7 @@ static struct memtype *memtype_get_idx(loff_t pos)
		return NULL;

		spin_lock(&memtype_lock);
		ret = rbt_memtype_copy_nth_element(print_entry, pos);
		ret = memtype_copy_nth_element(print_entry, pos);
		spin_unlock(&memtype_lock);

		if (!ret) {

Admin message