Commit 9ffc1d19 authored by Dan Williams's avatar Dan Williams
Browse files

mm/memremap_pages: Introduce memremap_compat_align()

The "sub-section memory hotplug" facility allows memremap_pages() users
like libnvdimm to compensate for hardware platforms like x86 that have a
section size larger than their hardware memory mapping granularity.  The
compensation that sub-section support affords is being tolerant of
physical memory resources shifting by units smaller (64MiB on x86) than
the memory-hotplug section size (128 MiB). Where the platform
physical-memory mapping granularity is limited by the number and
capability of address-decode-registers in the memory controller.

While the sub-section support allows memremap_pages() to operate on
sub-section (2MiB) granularity, the Power architecture may still
require 16MiB alignment on "!radix_enabled()" platforms.

In order for libnvdimm to be able to detect and manage this per-arch
limitation, introduce memremap_compat_align() as a common minimum
alignment across all driver-facing memory-mapping interfaces, and let
Power override it to 16MiB in the "!radix_enabled()" case.

The assumption / requirement for 16MiB to be a viable
memremap_compat_align() value is that Power does not have platforms
where its equivalent of address-decode-registers never hardware remaps a
persistent memory resource on smaller than 16MiB boundaries. Note that I
tried my best to not add a new Kconfig symbol, but header include
entanglements defeated the #ifndef memremap_compat_align design pattern
and the need to export it defeats the __weak design pattern for arch
overrides.

Based on an initial patch by Aneesh.

Link: http://lore.kernel.org/r/CAPcyv4gBGNP95APYaBcsocEa50tQj9b5h__83vgngjq3ouGX_Q@mail.gmail.com


Reported-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reported-by: default avatarJeff Moyer <jmoyer@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Reviewed-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 1d0827b7
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -122,6 +122,7 @@ config PPC
	select ARCH_HAS_GCOV_PROFILE_ALL
	select ARCH_HAS_KCOV
	select ARCH_HAS_HUGEPD			if HUGETLB_PAGE
	select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
	select ARCH_HAS_MMIOWB			if PPC64
	select ARCH_HAS_PHYS_TO_DMA
	select ARCH_HAS_PMEM_API
+21 −0
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@

#include <linux/io.h>
#include <linux/slab.h>
#include <linux/mmzone.h>
#include <linux/vmalloc.h>
#include <asm/io-workarounds.h>

@@ -97,3 +98,23 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,

	return NULL;
}

#ifdef CONFIG_ZONE_DEVICE
/*
 * Override the generic version in mm/memremap.c.
 *
 * With hash translation, the direct-map range is mapped with just one
 * page size selected by htab_init_page_sizes(). Consult
 * mmu_psize_defs[] to determine the minimum page size alignment.
*/
unsigned long memremap_compat_align(void)
{
	unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;

	if (radix_enabled())
		return SUBSECTION_SIZE;
	return max(SUBSECTION_SIZE, 1UL << shift);

}
EXPORT_SYMBOL_GPL(memremap_compat_align);
#endif
+1 −1
Original line number Diff line number Diff line
@@ -750,7 +750,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
	start = nsio->res.start;
	size = resource_size(&nsio->res);
	npfns = PHYS_PFN(size - SZ_8K);
	align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT));
	align = max(nd_pfn->align, SUBSECTION_SIZE);
	end_trunc = start + size - ALIGN_DOWN(start + size, align);
	if (nd_pfn->mode == PFN_MODE_PMEM) {
		/*
+8 −0
Original line number Diff line number Diff line
@@ -132,6 +132,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,

unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
unsigned long memremap_compat_align(void);
#else
static inline void *devm_memremap_pages(struct device *dev,
		struct dev_pagemap *pgmap)
@@ -165,6 +166,12 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
		unsigned long nr_pfns)
{
}

/* when memremap_pages() is disabled all archs can remap a single page */
static inline unsigned long memremap_compat_align(void)
{
	return PAGE_SIZE;
}
#endif /* CONFIG_ZONE_DEVICE */

static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
@@ -172,4 +179,5 @@ static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
	if (pgmap)
		percpu_ref_put(pgmap->ref);
}

#endif /* _LINUX_MEMREMAP_H_ */
+1 −0
Original line number Diff line number Diff line
@@ -1170,6 +1170,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)

#define SUBSECTION_SHIFT 21
#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)

#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
Loading