Commit b5684579 authored by Dan Williams's avatar Dan Williams
Browse files

Merge branch 'for-4.18/dax' into libnvdimm-for-next

parents 808c340b cc4a90ac
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
{
	struct block_device *bdev = sb->s_bdev;
	struct dax_device *dax_dev;
	bool dax_enabled = false;
	pgoff_t pgoff;
	int err, id;
	void *kaddr;
@@ -134,14 +135,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
		 * on being able to do (page_address(pfn_to_page())).
		 */
		WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
		dax_enabled = true;
	} else if (pfn_t_devmap(pfn)) {
		/* pass */;
	} else {
		struct dev_pagemap *pgmap;

		pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
		if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
			dax_enabled = true;
		put_dev_pagemap(pgmap);
	}

	if (!dax_enabled) {
		pr_debug("VFS (%s): error: dax support not enabled\n",
				sb->s_id);
		return -EOPNOTSUPP;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
+0 −2
Original line number Diff line number Diff line
@@ -561,8 +561,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
	res->start += start_pad;
	res->end -= end_trunc;

	pgmap->type = MEMORY_DEVICE_HOST;

	if (nd_pfn->mode == PFN_MODE_RAM) {
		if (offset < SZ_8K)
			return -EINVAL;
+25 −0
Original line number Diff line number Diff line
@@ -289,6 +289,27 @@ static void pmem_release_disk(void *__pmem)
	put_disk(pmem->disk);
}

static void pmem_release_pgmap_ops(void *__pgmap)
{
	dev_pagemap_put_ops();
}

static void fsdax_pagefree(struct page *page, void *data)
{
	wake_up_var(&page->_refcount);
}

static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap)
{
	dev_pagemap_get_ops();
	if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap))
		return -ENOMEM;
	pgmap->type = MEMORY_DEVICE_FS_DAX;
	pgmap->page_free = fsdax_pagefree;

	return 0;
}

static int pmem_attach_disk(struct device *dev,
		struct nd_namespace_common *ndns)
{
@@ -347,6 +368,8 @@ static int pmem_attach_disk(struct device *dev,
	pmem->pfn_flags = PFN_DEV;
	pmem->pgmap.ref = &q->q_usage_counter;
	if (is_nd_pfn(dev)) {
		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
			return -ENOMEM;
		addr = devm_memremap_pages(dev, &pmem->pgmap);
		pfn_sb = nd_pfn->pfn_sb;
		pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
@@ -358,6 +381,8 @@ static int pmem_attach_disk(struct device *dev,
	} else if (pmem_should_map_pages(dev)) {
		memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
		pmem->pgmap.altmap_valid = false;
		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
			return -ENOMEM;
		addr = devm_memremap_pages(dev, &pmem->pgmap);
		pmem->pfn_flags |= PFN_MAP;
		memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
+1 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ config FS_DAX
	bool "Direct Access (DAX) support"
	depends on MMU
	depends on !(ARM || MIPS || SPARC)
	select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
	select FS_IOMAP
	select DAX
	help
+99 −16
Original line number Diff line number Diff line
@@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
	}
}

static struct page *dax_busy_page(void *entry)
{
	unsigned long pfn;

	for_each_mapped_pfn(entry, pfn) {
		struct page *page = pfn_to_page(pfn);

		if (page_ref_count(page) > 1)
			return page;
	}
	return NULL;
}

/*
 * Find radix tree entry at given index. If it points to an exceptional entry,
 * return it with the radix tree entry locked. If the radix tree doesn't
@@ -492,6 +505,90 @@ restart:
	return entry;
}

/**
 * dax_layout_busy_page - find first pinned page in @mapping
 * @mapping: address space to scan for a page with ref count > 1
 *
 * DAX requires ZONE_DEVICE mapped pages. These pages are never
 * 'onlined' to the page allocator so they are considered idle when
 * page->count == 1. A filesystem uses this interface to determine if
 * any page in the mapping is busy, i.e. for DMA, or other
 * get_user_pages() usages.
 *
 * It is expected that the filesystem is holding locks to block the
 * establishment of new mappings in this address_space. I.e. it expects
 * to be able to run unmap_mapping_range() and subsequently not race
 * mapping_mapped() becoming true.
 */
struct page *dax_layout_busy_page(struct address_space *mapping)
{
	pgoff_t	indices[PAGEVEC_SIZE];
	struct page *page = NULL;
	struct pagevec pvec;
	pgoff_t	index, end;
	unsigned i;

	/*
	 * In the 'limited' case get_user_pages() for dax is disabled.
	 */
	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
		return NULL;

	if (!dax_mapping(mapping) || !mapping_mapped(mapping))
		return NULL;

	pagevec_init(&pvec);
	index = 0;
	end = -1;

	/*
	 * If we race get_user_pages_fast() here either we'll see the
	 * elevated page count in the pagevec_lookup and wait, or
	 * get_user_pages_fast() will see that the page it took a reference
	 * against is no longer mapped in the page tables and bail to the
	 * get_user_pages() slow path.  The slow path is protected by
	 * pte_lock() and pmd_lock(). New references are not taken without
	 * holding those locks, and unmap_mapping_range() will not zero the
	 * pte or pmd without holding the respective lock, so we are
	 * guaranteed to either see new references or prevent new
	 * references from being established.
	 */
	unmap_mapping_range(mapping, 0, 0, 1);

	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
				min(end - index, (pgoff_t)PAGEVEC_SIZE),
				indices)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *pvec_ent = pvec.pages[i];
			void *entry;

			index = indices[i];
			if (index >= end)
				break;

			if (!radix_tree_exceptional_entry(pvec_ent))
				continue;

			xa_lock_irq(&mapping->i_pages);
			entry = get_unlocked_mapping_entry(mapping, index, NULL);
			if (entry)
				page = dax_busy_page(entry);
			put_unlocked_mapping_entry(mapping, index, entry);
			xa_unlock_irq(&mapping->i_pages);
			if (page)
				break;
		}
		pagevec_remove_exceptionals(&pvec);
		pagevec_release(&pvec);
		index++;

		if (page)
			break;
	}
	return page;
}
EXPORT_SYMBOL_GPL(dax_layout_busy_page);

static int __dax_invalidate_mapping_entry(struct address_space *mapping,
					  pgoff_t index, bool trunc)
{
@@ -912,7 +1009,6 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
	unsigned long vaddr = vmf->address;
	int ret = VM_FAULT_NOPAGE;
	struct page *zero_page;
	void *entry2;
	pfn_t pfn;

	zero_page = ZERO_PAGE(0);
@@ -922,13 +1018,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
	}

	pfn = page_to_pfn_t(zero_page);
	entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
			RADIX_DAX_ZERO_PAGE, false);
	if (IS_ERR(entry2)) {
		ret = VM_FAULT_SIGBUS;
		goto out;
	}

	dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
			false);
	vm_insert_mixed(vmf->vma, vaddr, pfn);
out:
	trace_dax_load_hole(inode, vmf, ret);
@@ -1240,10 +1331,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,

		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
						 0, write && !sync);
		if (IS_ERR(entry)) {
			error = PTR_ERR(entry);
			goto error_finish_iomap;
		}

		/*
		 * If we are doing synchronous page fault and inode needs fsync,
@@ -1327,8 +1414,6 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
	pfn = page_to_pfn_t(zero_page);
	ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
			RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
	if (IS_ERR(ret))
		goto fallback;

	ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
	if (!pmd_none(*(vmf->pmd))) {
@@ -1450,8 +1535,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,

		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
						RADIX_DAX_PMD, write && !sync);
		if (IS_ERR(entry))
			goto finish_iomap;

		/*
		 * If we are doing synchronous page fault and inode needs fsync,
Loading