Commit d3b88655 authored by Dan Williams's avatar Dan Williams
Browse files

Merge branch 'for-5.7/numa' into libnvdimm-for-next

- Promote numa_map_to_online_node() to a cross-kernel generic facility.

- Save x86 numa information to allow for node-id lookups for reserved
  memory ranges, deploy that capability for the e820-pmem driver.

- Introduce phys_to_target_node() to facilitate drivers that want to
  know resulting numa node if a given reserved address range was
  onlined.
parents 91bf79bc 7b27a862
Loading
Loading
Loading
Loading
+1 −20
Original line number Diff line number Diff line
@@ -285,25 +285,6 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
	return 0;
}

static inline int papr_scm_node(int node)
{
	int min_dist = INT_MAX, dist;
	int nid, min_node;

	if ((node == NUMA_NO_NODE) || node_online(node))
		return node;

	min_node = first_online_node;
	for_each_online_node(nid) {
		dist = node_distance(node, nid);
		if (dist < min_dist) {
			min_dist = dist;
			min_node = nid;
		}
	}
	return min_node;
}

static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
{
	struct device *dev = &p->pdev->dev;
@@ -349,7 +330,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)

	memset(&ndr_desc, 0, sizeof(ndr_desc));
	target_nid = dev_to_node(&p->pdev->dev);
	online_nid = papr_scm_node(target_nid);
	online_nid = numa_map_to_online_node(target_nid);
	ndr_desc.numa_node = online_nid;
	ndr_desc.target_node = target_nid;
	ndr_desc.res = &p->res;
+1 −0
Original line number Diff line number Diff line
@@ -1664,6 +1664,7 @@ config X86_PMEM_LEGACY
	depends on PHYS_ADDR_T_64BIT
	depends on BLK_DEV
	select X86_PMEM_LEGACY_DEVICE
	select NUMA_KEEP_MEMINFO if NUMA
	select LIBNVDIMM
	help
	  Treat memory marked using the non-standard e820 type of 12 as used
+52 −15
Original line number Diff line number Diff line
@@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata;
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);

static struct numa_meminfo numa_meminfo
#ifndef CONFIG_MEMORY_HOTPLUG
__initdata
#endif
;
static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;

static int numa_distance_cnt;
static u8 *numa_distance;
@@ -168,6 +165,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
}

/**
 * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
 * @dst: numa_meminfo to append block to
 * @idx: Index of memblk to remove
 * @src: numa_meminfo to remove memblk from
 */
static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
					 struct numa_meminfo *src)
{
	dst->blk[dst->nr_blks++] = src->blk[idx];
	numa_remove_memblk_from(idx, src);
}

/**
 * numa_add_memblk - Add one numa_memblk to numa_meminfo
 * @nid: NUMA node ID of the new memblk
@@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
	for (i = 0; i < mi->nr_blks; i++) {
		struct numa_memblk *bi = &mi->blk[i];

		/* make sure all blocks are inside the limits */
		/* move / save reserved memory ranges */
		if (!memblock_overlaps_region(&memblock.memory,
					bi->start, bi->end - bi->start)) {
			numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
			continue;
		}

		/* make sure all non-reserved blocks are inside the limits */
		bi->start = max(bi->start, low);
		bi->end = min(bi->end, high);

		/* and there's no empty or non-exist block */
		if (bi->start >= bi->end ||
		    !memblock_overlaps_region(&memblock.memory,
			bi->start, bi->end - bi->start))
		/* and there's no empty block */
		if (bi->start >= bi->end)
			numa_remove_memblk_from(i--, mi);
	}

@@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node);

#endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */

#ifdef CONFIG_MEMORY_HOTPLUG
int memory_add_physaddr_to_nid(u64 start)
#ifdef CONFIG_NUMA_KEEP_MEMINFO
static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
{
	struct numa_meminfo *mi = &numa_meminfo;
	int nid = mi->blk[0].nid;
	int i;

	for (i = 0; i < mi->nr_blks; i++)
		if (mi->blk[i].start <= start && mi->blk[i].end > start)
			nid = mi->blk[i].nid;
			return mi->blk[i].nid;
	return NUMA_NO_NODE;
}

int phys_to_target_node(phys_addr_t start)
{
	int nid = meminfo_to_nid(&numa_meminfo, start);

	/*
	 * Prefer online nodes, but if reserved memory might be
	 * hot-added continue the search with reserved ranges.
	 */
	if (nid != NUMA_NO_NODE)
		return nid;

	return meminfo_to_nid(&numa_reserved_meminfo, start);
}
EXPORT_SYMBOL_GPL(phys_to_target_node);

int memory_add_physaddr_to_nid(u64 start)
{
	int nid = meminfo_to_nid(&numa_meminfo, start);

	if (nid == NUMA_NO_NODE)
		nid = numa_meminfo.blk[0].nid;
	return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+0 −41
Original line number Diff line number Diff line
@@ -72,47 +72,6 @@ int acpi_map_pxm_to_node(int pxm)
}
EXPORT_SYMBOL(acpi_map_pxm_to_node);

/**
 * acpi_map_pxm_to_online_node - Map proximity ID to online node
 * @pxm: ACPI proximity ID
 *
 * This is similar to acpi_map_pxm_to_node(), but always returns an online
 * node.  When the mapped node from a given proximity ID is offline, it
 * looks up the node distance table and returns the nearest online node.
 *
 * ACPI device drivers, which are called after the NUMA initialization has
 * completed in the kernel, can call this interface to obtain their device
 * NUMA topology from ACPI tables.  Such drivers do not have to deal with
 * offline nodes.  A node may be offline when a device proximity ID is
 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
 * "numa=off" on x86.
 */
int acpi_map_pxm_to_online_node(int pxm)
{
	int node, min_node;

	node = acpi_map_pxm_to_node(pxm);

	if (node == NUMA_NO_NODE)
		node = 0;

	min_node = node;
	if (!node_online(node)) {
		int min_dist = INT_MAX, dist, n;

		for_each_online_node(n) {
			dist = node_distance(node, n);
			if (dist < min_dist) {
				min_dist = dist;
				min_node = n;
			}
		}
	}

	return min_node;
}
EXPORT_SYMBOL(acpi_map_pxm_to_online_node);

static void __init
acpi_table_print_srat_entry(struct acpi_subtable_header *header)
{
+4 −14
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <linux/memory_hotplug.h>
#include <linux/libnvdimm.h>
#include <linux/module.h>
#include <linux/numa.h>

static int e820_pmem_remove(struct platform_device *pdev)
{
@@ -16,27 +17,16 @@ static int e820_pmem_remove(struct platform_device *pdev)
	return 0;
}

#ifdef CONFIG_MEMORY_HOTPLUG
static int e820_range_to_nid(resource_size_t addr)
{
	return memory_add_physaddr_to_nid(addr);
}
#else
static int e820_range_to_nid(resource_size_t addr)
{
	return NUMA_NO_NODE;
}
#endif

static int e820_register_one(struct resource *res, void *data)
{
	struct nd_region_desc ndr_desc;
	struct nvdimm_bus *nvdimm_bus = data;
	int nid = phys_to_target_node(res->start);

	memset(&ndr_desc, 0, sizeof(ndr_desc));
	ndr_desc.res = res;
	ndr_desc.numa_node = e820_range_to_nid(res->start);
	ndr_desc.target_node = ndr_desc.numa_node;
	ndr_desc.numa_node = numa_map_to_online_node(nid);
	ndr_desc.target_node = nid;
	set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
	if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
		return -ENXIO;
Loading