Commit 1e240e8d authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jason Gunthorpe
Browse files

memremap: move dev_pagemap callbacks into a separate structure



The dev_pagemap is a growing too many callbacks.  Move them into a
separate ops structure so that they are not duplicated for multiple
instances, and an attacker can't easily overwrite them.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarLogan Gunthorpe <logang@deltatee.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Tested-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 3ed2dcdf
Loading
Loading
Loading
Loading
+7 −4
Original line number Diff line number Diff line
@@ -36,9 +36,8 @@ static void dev_dax_percpu_exit(struct percpu_ref *ref)
	percpu_ref_exit(ref);
}

static void dev_dax_percpu_kill(struct percpu_ref *data)
static void dev_dax_percpu_kill(struct percpu_ref *ref)
{
	struct percpu_ref *ref = data;
	struct dev_dax *dev_dax = ref_to_dev_dax(ref);

	dev_dbg(&dev_dax->dev, "%s\n", __func__);
@@ -442,6 +441,11 @@ static void dev_dax_kill(void *dev_dax)
	kill_dev_dax(dev_dax);
}

static const struct dev_pagemap_ops dev_dax_pagemap_ops = {
	.kill		= dev_dax_percpu_kill,
	.cleanup	= dev_dax_percpu_exit,
};

int dev_dax_probe(struct device *dev)
{
	struct dev_dax *dev_dax = to_dev_dax(dev);
@@ -466,9 +470,8 @@ int dev_dax_probe(struct device *dev)
		return rc;

	dev_dax->pgmap.ref = &dev_dax->ref;
	dev_dax->pgmap.kill = dev_dax_percpu_kill;
	dev_dax->pgmap.cleanup = dev_dax_percpu_exit;
	dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
	dev_dax->pgmap.ops = &dev_dax_pagemap_ops;
	addr = devm_memremap_pages(dev, &dev_dax->pgmap);
	if (IS_ERR(addr))
		return PTR_ERR(addr);
+1 −1
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
	struct dev_dax *dev_dax;
	struct nd_namespace_io *nsio;
	struct dax_region *dax_region;
	struct dev_pagemap pgmap = { 0 };
	struct dev_pagemap pgmap = { };
	struct nd_namespace_common *ndns;
	struct nd_dax *nd_dax = to_nd_dax(dev);
	struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
+11 −8
Original line number Diff line number Diff line
@@ -303,7 +303,7 @@ static const struct attribute_group *pmem_attribute_groups[] = {
	NULL,
};

static void __pmem_release_queue(struct percpu_ref *ref)
static void pmem_pagemap_cleanup(struct percpu_ref *ref)
{
	struct request_queue *q;

@@ -313,10 +313,10 @@ static void __pmem_release_queue(struct percpu_ref *ref)

static void pmem_release_queue(void *ref)
{
	__pmem_release_queue(ref);
	pmem_pagemap_cleanup(ref);
}

static void pmem_freeze_queue(struct percpu_ref *ref)
static void pmem_pagemap_kill(struct percpu_ref *ref)
{
	struct request_queue *q;

@@ -339,19 +339,24 @@ static void pmem_release_pgmap_ops(void *__pgmap)
	dev_pagemap_put_ops();
}

static void fsdax_pagefree(struct page *page, void *data)
static void pmem_pagemap_page_free(struct page *page, void *data)
{
	wake_up_var(&page->_refcount);
}

static const struct dev_pagemap_ops fsdax_pagemap_ops = {
	.page_free		= pmem_pagemap_page_free,
	.kill			= pmem_pagemap_kill,
	.cleanup		= pmem_pagemap_cleanup,
};

static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap)
{
	dev_pagemap_get_ops();
	if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap))
		return -ENOMEM;
	pgmap->type = MEMORY_DEVICE_FS_DAX;
	pgmap->page_free = fsdax_pagefree;

	pgmap->ops = &fsdax_pagemap_ops;
	return 0;
}

@@ -409,8 +414,6 @@ static int pmem_attach_disk(struct device *dev,

	pmem->pfn_flags = PFN_DEV;
	pmem->pgmap.ref = &q->q_usage_counter;
	pmem->pgmap.kill = pmem_freeze_queue;
	pmem->pgmap.cleanup = __pmem_release_queue;
	if (is_nd_pfn(dev)) {
		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
			return -ENOMEM;
+6 −2
Original line number Diff line number Diff line
@@ -153,6 +153,11 @@ out:
	return error;
}

static const struct dev_pagemap_ops pci_p2pdma_pagemap_ops = {
	.kill		= pci_p2pdma_percpu_kill,
	.cleanup	= pci_p2pdma_percpu_cleanup,
};

/**
 * pci_p2pdma_add_resource - add memory for use as p2p memory
 * @pdev: the device to add the memory to
@@ -208,8 +213,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
	pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
		pci_resource_start(pdev, bar);
	pgmap->kill = pci_p2pdma_percpu_kill;
	pgmap->cleanup = pci_p2pdma_percpu_cleanup;
	pgmap->ops = &pci_p2pdma_pagemap_ops;

	addr = devm_memremap_pages(&pdev->dev, pgmap);
	if (IS_ERR(addr)) {
+20 −16
Original line number Diff line number Diff line
@@ -63,41 +63,45 @@ enum memory_type {
	MEMORY_DEVICE_PCI_P2PDMA,
};

struct dev_pagemap_ops {
	/*
 * Additional notes about MEMORY_DEVICE_PRIVATE may be found in
 * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief
 * explanation in include/linux/memory_hotplug.h.
 *
 * The page_free() callback is called once the page refcount reaches 1
 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
 * This allows the device driver to implement its own memory management.)
	 * Called once the page refcount reaches 1.  (ZONE_DEVICE pages never
	 * reach 0 refcount unless there is a refcount bug. This allows the
	 * device driver to implement its own memory management.)
	 */
	void (*page_free)(struct page *page, void *data);

	/*
	 * Transition the refcount in struct dev_pagemap to the dead state.
	 */
	void (*kill)(struct percpu_ref *ref);

	/*
	 * Wait for refcount in struct dev_pagemap to be idle and reap it.
	 */
typedef void (*dev_page_free_t)(struct page *page, void *data);
	void (*cleanup)(struct percpu_ref *ref);
};

/**
 * struct dev_pagemap - metadata for ZONE_DEVICE mappings
 * @page_free: free page callback when page refcount reaches 1
 * @altmap: pre-allocated/reserved memory for vmemmap allocations
 * @res: physical address range covered by @ref
 * @ref: reference count that pins the devm_memremap_pages() mapping
 * @kill: callback to transition @ref to the dead state
 * @cleanup: callback to wait for @ref to be idle and reap it
 * @dev: host device of the mapping for debug
 * @data: private data pointer for page_free()
 * @type: memory type: see MEMORY_* in memory_hotplug.h
 * @ops: method table
 */
struct dev_pagemap {
	dev_page_free_t page_free;
	struct vmem_altmap altmap;
	bool altmap_valid;
	struct resource res;
	struct percpu_ref *ref;
	void (*kill)(struct percpu_ref *ref);
	void (*cleanup)(struct percpu_ref *ref);
	struct device *dev;
	void *data;
	enum memory_type type;
	u64 pci_p2pdma_bus_offset;
	const struct dev_pagemap_ops *ops;
};

#ifdef CONFIG_ZONE_DEVICE
Loading