Commit 7d3bf613 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull libnvdimm updates from Dan Williams:
 "This adds a user for the new 'bytes-remaining' updates to
  memcpy_mcsafe() that you already received through Ingo via the
  x86-dax- for-linus pull.

  Not included here, but still targeting this cycle, is support for
  handling memory media errors (poison) consumed via userspace dax
  mappings.

  Summary:

   - DAX broke a fundamental assumption of truncate of file mapped
     pages. The truncate path assumed that it is safe to disconnect a
     pinned page from a file and let the filesystem reclaim the physical
     block. With DAX the page is equivalent to the filesystem block.
     Introduce dax_layout_busy_page() to enable filesystems to wait for
     pinned DAX pages to be released. Without this wait a filesystem
     could allocate blocks under active device-DMA to a new file.

   - DAX arranges for the block layer to be bypassed and uses
     dax_direct_access() + copy_to_iter() to satisfy read(2) calls.
     However, the memcpy_mcsafe() facility is available through the pmem
     block driver. In order to safely handle media errors, via the DAX
     block-layer bypass, introduce copy_to_iter_mcsafe().

   - Fix cache management policy relative to the ACPI NFIT Platform
     Capabilities Structure to properly elide cache flushes when they
     are not necessary. The table indicates whether CPU caches are
     power-fail protected. Clarify that a deep flush is always performed
     on REQ_{FUA,PREFLUSH} requests"

* tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits)
  dax: Use dax_write_cache* helpers
  libnvdimm, pmem: Do not flush power-fail protected CPU caches
  libnvdimm, pmem: Unconditionally deep flush on *sync
  libnvdimm, pmem: Complete REQ_FLUSH => REQ_PREFLUSH
  acpi, nfit: Remove ecc_unit_size
  dax: dax_insert_mapping_entry always succeeds
  libnvdimm, e820: Register all pmem resources
  libnvdimm: Debug probe times
  linvdimm, pmem: Preserve read-only setting for pmem devices
  x86, nfit_test: Add unit test for memcpy_mcsafe()
  pmem: Switch to copy_to_iter_mcsafe()
  dax: Report bytes remaining in dax_iomap_actor()
  dax: Introduce a ->copy_to_iter dax operation
  uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation
  xfs, dax: introduce xfs_break_dax_layouts()
  xfs: prepare xfs_break_layouts() for another layout type
  xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL
  mm, fs, dax: handle layout changes to pinned dax mappings
  mm: fix __gup_device_huge vs unmap
  mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS
  ...
parents a3818841 930218af
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
What:		/sys/bus/nd/devices/regionX/nfit/ecc_unit_size
Date:		Aug, 2017
KernelVersion:	v4.14 (Removed v4.18)
Contact:	linux-nvdimm@lists.01.org
Description:
		(RO) Size of a write request to a DIMM that will not incur a
		read-modify-write cycle at the memory controller.

		When the nfit driver initializes it runs an ARS (Address Range
		Scrub) operation across every pmem range. Part of that process
		involves determining the ARS capabilities of a given address
		range. One of the capabilities that is reported is the 'Clear
		Uncorrectable Error Range Length Unit Size' (see: ACPI 6.2
		section 9.20.7.4 Function Index 1 - Query ARS Capabilities).
		This property indicates the boundary at which the NVDIMM may
		need to perform read-modify-write cycles to maintain ECC (Error
		Correcting Code) blocks.
+0 −19
Original line number Diff line number Diff line
@@ -212,22 +212,3 @@ Description:
		range. Used by NVDIMM Region Mapping Structure to uniquely refer
		to this structure. Value of 0 is reserved and not used as an
		index.


What:		/sys/bus/nd/devices/regionX/nfit/ecc_unit_size
Date:		Aug, 2017
KernelVersion:	v4.14
Contact:	linux-nvdimm@lists.01.org
Description:
		(RO) Size of a write request to a DIMM that will not incur a
		read-modify-write cycle at the memory controller.

		When the nfit driver initializes it runs an ARS (Address Range
		Scrub) operation across every pmem range. Part of that process
		involves determining the ARS capabilities of a given address
		range. One of the capabilities that is reported is the 'Clear
		Uncorrectable Error Range Length Unit Size' (see: ACPI 6.2
		section 9.20.7.4 Function Index 1 - Query ARS Capabilities).
		This property indicates the boundary at which the NVDIMM may
		need to perform read-modify-write cycles to maintain ECC (Error
		Correcting Code) blocks.
+3 −0
Original line number Diff line number Diff line
@@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC
	  You should normally say N here, unless you want to debug early
	  crashes or need a very simple printk logging facility.

config MCSAFE_TEST
	def_bool n

config X86_PTDUMP_CORE
	def_bool n

+75 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MCSAFE_TEST_H_
#define _MCSAFE_TEST_H_

#ifndef __ASSEMBLY__
#ifdef CONFIG_MCSAFE_TEST
extern unsigned long mcsafe_test_src;
extern unsigned long mcsafe_test_dst;

static inline void mcsafe_inject_src(void *addr)
{
	if (addr)
		mcsafe_test_src = (unsigned long) addr;
	else
		mcsafe_test_src = ~0UL;
}

static inline void mcsafe_inject_dst(void *addr)
{
	if (addr)
		mcsafe_test_dst = (unsigned long) addr;
	else
		mcsafe_test_dst = ~0UL;
}
#else /* CONFIG_MCSAFE_TEST */
static inline void mcsafe_inject_src(void *addr)
{
}

static inline void mcsafe_inject_dst(void *addr)
{
}
#endif /* CONFIG_MCSAFE_TEST */

#else /* __ASSEMBLY__ */
#include <asm/export.h>

#ifdef CONFIG_MCSAFE_TEST
.macro MCSAFE_TEST_CTL
	.pushsection .data
	.align 8
	.globl mcsafe_test_src
	mcsafe_test_src:
		.quad 0
	EXPORT_SYMBOL_GPL(mcsafe_test_src)
	.globl mcsafe_test_dst
	mcsafe_test_dst:
		.quad 0
	EXPORT_SYMBOL_GPL(mcsafe_test_dst)
	.popsection
.endm

.macro MCSAFE_TEST_SRC reg count target
	leaq \count(\reg), %r9
	cmp mcsafe_test_src, %r9
	ja \target
.endm

.macro MCSAFE_TEST_DST reg count target
	leaq \count(\reg), %r9
	cmp mcsafe_test_dst, %r9
	ja \target
.endm
#else
.macro MCSAFE_TEST_CTL
.endm

.macro MCSAFE_TEST_SRC reg count target
.endm

.macro MCSAFE_TEST_DST reg count target
.endm
#endif /* CONFIG_MCSAFE_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _MCSAFE_TEST_H_ */
+10 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>

@@ -183,6 +184,9 @@ ENTRY(memcpy_orig)
ENDPROC(memcpy_orig)

#ifndef CONFIG_UML

MCSAFE_TEST_CTL

/*
 * __memcpy_mcsafe - memory copy with machine check exception handling
 * Note that we only catch machine checks when reading the source addresses.
@@ -206,6 +210,8 @@ ENTRY(__memcpy_mcsafe)
	subl %ecx, %edx
.L_read_leading_bytes:
	movb (%rsi), %al
	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
	movb %al, (%rdi)
	incq %rsi
@@ -221,6 +227,8 @@ ENTRY(__memcpy_mcsafe)

.L_read_words:
	movq (%rsi), %r8
	MCSAFE_TEST_SRC %rsi 8 .E_read_words
	MCSAFE_TEST_DST %rdi 8 .E_write_words
.L_write_words:
	movq %r8, (%rdi)
	addq $8, %rsi
@@ -237,6 +245,8 @@ ENTRY(__memcpy_mcsafe)
	movl %edx, %ecx
.L_read_trailing_bytes:
	movb (%rsi), %al
	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
.L_write_trailing_bytes:
	movb %al, (%rdi)
	incq %rsi
Loading