Commit 72981e0e authored by Andrea Arcangeli's avatar Andrea Arcangeli Committed by Linus Torvalds
Browse files

userfaultfd: wp: add UFFDIO_COPY_MODE_WP



This allows UFFDIO_COPY to map pages write-protected.

[peterx@redhat.com: switch to VM_WARN_ON_ONCE in mfill_atomic_pte; add brackets
 around "dst_vma->vm_flags & VM_WRITE"; fix wordings in comments and
 commit messages]
Signed-off-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
Signed-off-by: default avatarPeter Xu <peterx@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Reviewed-by: default avatarJerome Glisse <jglisse@redhat.com>
Reviewed-by: default avatarMike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Bobby Powers <bobbypowers@gmail.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Denis Plotnikov <dplotnikov@virtuozzo.com>
Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Martin Cracauer <cracauer@cons.org>
Cc: Marty McFadden <mcfadden8@llnl.gov>
Cc: Maya Gokhale <gokhale2@llnl.gov>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@fb.com>
Link: http://lkml.kernel.org/r/20200220163112.11409-6-peterx@redhat.com


Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 55adf4de
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -1724,11 +1724,12 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
	ret = -EINVAL;
	if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
		goto out;
	if (uffdio_copy.mode & ~UFFDIO_COPY_MODE_DONTWAKE)
	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
		goto out;
	if (mmget_not_zero(ctx->mm)) {
		ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
				   uffdio_copy.len, &ctx->mmap_changing);
				   uffdio_copy.len, &ctx->mmap_changing,
				   uffdio_copy.mode);
		mmput(ctx->mm);
	} else {
		return -ESRCH;
+1 −1
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);

extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
			    unsigned long src_start, unsigned long len,
			    bool *mmap_changing);
			    bool *mmap_changing, __u64 mode);
extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
			      unsigned long dst_start,
			      unsigned long len,
+6 −5
Original line number Diff line number Diff line
@@ -203,13 +203,14 @@ struct uffdio_copy {
	__u64 dst;
	__u64 src;
	__u64 len;
#define UFFDIO_COPY_MODE_DONTWAKE		((__u64)1<<0)
	/*
	 * There will be a wrprotection flag later that allows to map
	 * pages wrprotected on the fly. And such a flag will be
	 * available if the wrprotection ioctl are implemented for the
	 * range according to the uffdio_register.ioctls.
	 * UFFDIO_COPY_MODE_WP will map the page write protected on
	 * the fly.  UFFDIO_COPY_MODE_WP is available only if the
	 * write protected ioctl is implemented for the range
	 * according to the uffdio_register.ioctls.
	 */
#define UFFDIO_COPY_MODE_DONTWAKE		((__u64)1<<0)
#define UFFDIO_COPY_MODE_WP			((__u64)1<<1)
	__u64 mode;

	/*
+25 −11
Original line number Diff line number Diff line
@@ -53,7 +53,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
			    struct vm_area_struct *dst_vma,
			    unsigned long dst_addr,
			    unsigned long src_addr,
			    struct page **pagep)
			    struct page **pagep,
			    bool wp_copy)
{
	struct mem_cgroup *memcg;
	pte_t _dst_pte, *dst_pte;
@@ -99,9 +100,9 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
	if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
		goto out_release;

	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
	if (dst_vma->vm_flags & VM_WRITE)
		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
	_dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
	if ((dst_vma->vm_flags & VM_WRITE) && !wp_copy)
		_dst_pte = pte_mkwrite(_dst_pte);

	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
	if (dst_vma->vm_file) {
@@ -415,7 +416,8 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
						unsigned long dst_addr,
						unsigned long src_addr,
						struct page **page,
						bool zeropage)
						bool zeropage,
						bool wp_copy)
{
	ssize_t err;

@@ -432,11 +434,13 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
	if (!(dst_vma->vm_flags & VM_SHARED)) {
		if (!zeropage)
			err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
					       dst_addr, src_addr, page);
					       dst_addr, src_addr, page,
					       wp_copy);
		else
			err = mfill_zeropage_pte(dst_mm, dst_pmd,
						 dst_vma, dst_addr);
	} else {
		VM_WARN_ON_ONCE(wp_copy);
		if (!zeropage)
			err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
						     dst_vma, dst_addr,
@@ -454,7 +458,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
					      unsigned long src_start,
					      unsigned long len,
					      bool zeropage,
					      bool *mmap_changing)
					      bool *mmap_changing,
					      __u64 mode)
{
	struct vm_area_struct *dst_vma;
	ssize_t err;
@@ -462,6 +467,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
	unsigned long src_addr, dst_addr;
	long copied;
	struct page *page;
	bool wp_copy;

	/*
	 * Sanitize the command parameters:
@@ -507,6 +513,14 @@ retry:
	    dst_vma->vm_flags & VM_SHARED))
		goto out_unlock;

	/*
	 * validate 'mode' now that we know the dst_vma: don't allow
	 * a wrprotect copy if the userfaultfd didn't register as WP.
	 */
	wp_copy = mode & UFFDIO_COPY_MODE_WP;
	if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
		goto out_unlock;

	/*
	 * If this is a HUGETLB vma, pass off to appropriate routine
	 */
@@ -562,7 +576,7 @@ retry:
		BUG_ON(pmd_trans_huge(*dst_pmd));

		err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
				       src_addr, &page, zeropage);
				       src_addr, &page, zeropage, wp_copy);
		cond_resched();

		if (unlikely(err == -ENOENT)) {
@@ -609,14 +623,14 @@ out:

ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
		     unsigned long src_start, unsigned long len,
		     bool *mmap_changing)
		     bool *mmap_changing, __u64 mode)
{
	return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
			      mmap_changing);
			      mmap_changing, mode);
}

ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
		       unsigned long len, bool *mmap_changing)
{
	return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
	return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0);
}