Commit 0ac398b1 authored by Yunfeng Ye's avatar Yunfeng Ye Committed by Linus Torvalds
Browse files

mm: support memblock alloc on the exact node for sparse_buffer_init()

sparse_buffer_init() use memblock_alloc_try_nid_raw() to allocate memory
for page management structure, if memory allocation fails from specified
node, it will fall back to allocate from other nodes.

Normally, the page management structure will not exceed 2% of the total
memory, but a large continuous block of allocation is needed.  In most
cases, memory allocation from the specified node will succeed, but a
node memory become highly fragmented will fail.  we expect to allocate
memory base section rather than by allocating a large block of memory
from other NUMA nodes

Add memblock_alloc_exact_nid_raw() for this situation, which allocate
boot memory block on the exact node.  If a large contiguous block memory
allocate fail in sparse_buffer_init(), it will fall back to allocate
small block memory base section.

Link: http://lkml.kernel.org/r/66755ea7-ab10-8882-36fd-3e02b03775d5@huawei.com


Signed-off-by: default avatarYunfeng Ye <yeyunfeng@huawei.com>
Reviewed-by: default avatarMike Rapoport <rppt@linux.ibm.com>
Cc: Wei Yang <richardw.yang@linux.intel.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Qian Cai <cai@lca.pw>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 95830666
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -358,6 +358,9 @@ static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
					 MEMBLOCK_ALLOC_ACCESSIBLE);
}

void *memblock_alloc_exact_nid_raw(phys_addr_t size, phys_addr_t align,
				 phys_addr_t min_addr, phys_addr_t max_addr,
				 int nid);
void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
				 phys_addr_t min_addr, phys_addr_t max_addr,
				 int nid);
+54 −11
Original line number Diff line number Diff line
@@ -1319,12 +1319,13 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
 * @start: the lower bound of the memory region to allocate (phys address)
 * @end: the upper bound of the memory region to allocate (phys address)
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
 * @exact_nid: control the allocation fall back to other nodes
 *
 * The allocation is performed from memory region limited by
 * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE.
 *
 * If the specified node can not hold the requested memory the
 * allocation falls back to any node in the system
 * If the specified node can not hold the requested memory and @exact_nid
 * is false, the allocation falls back to any node in the system.
 *
 * For systems with memory mirroring, the allocation is attempted first
 * from the regions with mirroring enabled and then retried from any
@@ -1338,7 +1339,8 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
 */
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
					phys_addr_t align, phys_addr_t start,
					phys_addr_t end, int nid)
					phys_addr_t end, int nid,
					bool exact_nid)
{
	enum memblock_flags flags = choose_memblock_flags();
	phys_addr_t found;
@@ -1358,7 +1360,7 @@ again:
	if (found && !memblock_reserve(found, size))
		goto done;

	if (nid != NUMA_NO_NODE) {
	if (nid != NUMA_NO_NODE && !exact_nid) {
		found = memblock_find_in_range_node(size, align, start,
						    end, NUMA_NO_NODE,
						    flags);
@@ -1406,7 +1408,8 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
					     phys_addr_t start,
					     phys_addr_t end)
{
	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
					false);
}

/**
@@ -1425,7 +1428,7 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
{
	return memblock_alloc_range_nid(size, align, 0,
					MEMBLOCK_ALLOC_ACCESSIBLE, nid);
					MEMBLOCK_ALLOC_ACCESSIBLE, nid, false);
}

/**
@@ -1435,6 +1438,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
 * @min_addr: the lower bound of the memory region to allocate (phys address)
 * @max_addr: the upper bound of the memory region to allocate (phys address)
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
 * @exact_nid: control the allocation fall back to other nodes
 *
 * Allocates memory block using memblock_alloc_range_nid() and
 * converts the returned physical address to virtual.
@@ -1450,7 +1454,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
static void * __init memblock_alloc_internal(
				phys_addr_t size, phys_addr_t align,
				phys_addr_t min_addr, phys_addr_t max_addr,
				int nid)
				int nid, bool exact_nid)
{
	phys_addr_t alloc;

@@ -1465,11 +1469,13 @@ static void * __init memblock_alloc_internal(
	if (max_addr > memblock.current_limit)
		max_addr = memblock.current_limit;

	alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
	alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid,
					exact_nid);

	/* retry allocation without lower limit */
	if (!alloc && min_addr)
		alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid);
		alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid,
						exact_nid);

	if (!alloc)
		return NULL;
@@ -1477,6 +1483,43 @@ static void * __init memblock_alloc_internal(
	return phys_to_virt(alloc);
}

/**
 * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node
 * without zeroing memory
 * @size: size of memory block to be allocated in bytes
 * @align: alignment of the region and block's size
 * @min_addr: the lower bound of the memory region from where the allocation
 *	  is preferred (phys address)
 * @max_addr: the upper bound of the memory region from where the allocation
 *	      is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
 *	      allocate only from memory limited by memblock.current_limit value
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
 *
 * Public function, provides additional debug information (including caller
 * info), if enabled. Does not zero allocated memory.
 *
 * Return:
 * Virtual address of allocated memory block on success, NULL on failure.
 */
void * __init memblock_alloc_exact_nid_raw(
			phys_addr_t size, phys_addr_t align,
			phys_addr_t min_addr, phys_addr_t max_addr,
			int nid)
{
	void *ptr;

	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
		     __func__, (u64)size, (u64)align, nid, &min_addr,
		     &max_addr, (void *)_RET_IP_);

	ptr = memblock_alloc_internal(size, align,
					   min_addr, max_addr, nid, true);
	if (ptr && size > 0)
		page_init_poison(ptr, size);

	return ptr;
}

/**
 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
 * memory and without panicking
@@ -1508,7 +1551,7 @@ void * __init memblock_alloc_try_nid_raw(
		     &max_addr, (void *)_RET_IP_);

	ptr = memblock_alloc_internal(size, align,
					   min_addr, max_addr, nid);
					   min_addr, max_addr, nid, false);
	if (ptr && size > 0)
		page_init_poison(ptr, size);

@@ -1543,7 +1586,7 @@ void * __init memblock_alloc_try_nid(
		     __func__, (u64)size, (u64)align, nid, &min_addr,
		     &max_addr, (void *)_RET_IP_);
	ptr = memblock_alloc_internal(size, align,
					   min_addr, max_addr, nid);
					   min_addr, max_addr, nid, false);
	if (ptr)
		memset(ptr, 0, size);

+1 −1
Original line number Diff line number Diff line
@@ -486,7 +486,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
	 * and we want it to be properly aligned to the section size - this is
	 * especially the case for VMEMMAP which maps memmap to PMDs
	 */
	sparsemap_buf = memblock_alloc_try_nid_raw(size, section_map_size(),
	sparsemap_buf = memblock_alloc_exact_nid_raw(size, section_map_size(),
					addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
	sparsemap_buf_end = sparsemap_buf + size;
}