Commit 50a5de89 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull hmm updates from Jason Gunthorpe:
 "This series focuses on corner case bug fixes and general clarity
  improvements to hmm_range_fault(). It arose from a review of
  hmm_range_fault() by Christoph, Ralph and myself.

  hmm_range_fault() is being used by these 'SVM' style drivers to
  non-destructively read the page tables. It is very similar to
  get_user_pages() except that the output is an array of PFNs and
  per-pfn flags, and it has various modes of reading.

  This is necessary before RDMA ODP can be converted, as we don't want
  to have weird corner case regressions, which is still a looking
  forward item. Ralph has a nice tester for this routine, but it is
  waiting for feedback from the selftests maintainers.

  Summary:

   - 9 bug fixes

   - Allow pgmap to track the 'owner' of a DEVICE_PRIVATE - in this case
     the owner tells the driver if it can understand the DEVICE_PRIVATE
     page or not. Use this to resolve a bug in nouveau where it could
     touch DEVICE_PRIVATE pages from other drivers.

   - Remove a bunch of dead, redundant or unused code and flags

   - Clarity improvements to hmm_range_fault()"

* tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (25 commits)
  mm/hmm: return error for non-vma snapshots
  mm/hmm: do not set pfns when returning an error code
  mm/hmm: do not unconditionally set pfns when returning EBUSY
  mm/hmm: use device_private_entry_to_pfn()
  mm/hmm: remove HMM_FAULT_SNAPSHOT
  mm/hmm: remove unused code and tidy comments
  mm/hmm: return the fault type from hmm_pte_need_fault()
  mm/hmm: remove pgmap checking for devmap pages
  mm/hmm: check the device private page owner in hmm_range_fault()
  mm: simplify device private page handling in hmm_range_fault
  mm: handle multiple owners of device private pages in migrate_vma
  memremap: add an owner field to struct dev_pagemap
  mm: merge hmm_vma_do_fault into into hmm_vma_walk_hole_
  mm/hmm: don't handle the non-fault case in hmm_vma_walk_hole_()
  mm/hmm: simplify hmm_vma_walk_hugetlb_entry()
  mm/hmm: remove the unused HMM_FAULT_ALLOW_RETRY flag
  mm/hmm: don't provide a stub for hmm_range_fault()
  mm/hmm: do not check pmd_protnone twice in hmm_vma_handle_pmd()
  mm/hmm: add missing call to hmm_pte_need_fault in HMM_PFN_SPECIAL handling
  mm/hmm: return -EFAULT when setting HMM_PFN_ERROR on requested valid pages
  ...
parents 193bc55b bd5d3587
Loading
Loading
Loading
Loading
+5 −7
Original line number Diff line number Diff line
@@ -161,13 +161,11 @@ device must complete the update before the driver callback returns.
When the device driver wants to populate a range of virtual addresses, it can
use::

  long hmm_range_fault(struct hmm_range *range, unsigned int flags);
  long hmm_range_fault(struct hmm_range *range);

With the HMM_RANGE_SNAPSHOT flag, it will only fetch present CPU page table
entries and will not trigger a page fault on missing or non-present entries.
Without that flag, it does trigger a page fault on missing or read-only entries
if write access is requested (see below). Page faults use the generic mm page
fault code path just like a CPU page fault.
It will trigger a page fault on missing or read-only entries if write access is
requested (see below). Page faults use the generic mm page fault code path just
like a CPU page fault.

Both functions copy CPU page table entries into their pfns array argument. Each
entry in that array corresponds to an address in the virtual range. HMM
@@ -197,7 +195,7 @@ The usage pattern is::
 again:
      range.notifier_seq = mmu_interval_read_begin(&interval_sub);
      down_read(&mm->mmap_sem);
      ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT);
      ret = hmm_range_fault(&range);
      if (ret) {
          up_read(&mm->mmap_sem);
          if (ret == -EBUSY)
+3 −0
Original line number Diff line number Diff line
@@ -563,6 +563,7 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
	mig.end = end;
	mig.src = &src_pfn;
	mig.dst = &dst_pfn;
	mig.src_owner = &kvmppc_uvmem_pgmap;

	mutex_lock(&kvm->arch.uvmem_lock);
	/* The requested page is already paged-out, nothing to do */
@@ -779,6 +780,8 @@ int kvmppc_uvmem_init(void)
	kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
	kvmppc_uvmem_pgmap.res = *res;
	kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
	/* just one global instance: */
	kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
	addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
	if (IS_ERR(addr)) {
		ret = PTR_ERR(addr);
+1 −2
Original line number Diff line number Diff line
@@ -770,7 +770,6 @@ struct amdgpu_ttm_tt {
static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
	(1 << 0), /* HMM_PFN_VALID */
	(1 << 1), /* HMM_PFN_WRITE */
	0 /* HMM_PFN_DEVICE_PRIVATE */
};

static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
@@ -851,7 +850,7 @@ retry:
	range->notifier_seq = mmu_interval_read_begin(&bo->notifier);

	down_read(&mm->mmap_sem);
	r = hmm_range_fault(range, 0);
	r = hmm_range_fault(range);
	up_read(&mm->mmap_sem);
	if (unlikely(r <= 0)) {
		/*
+5 −14
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@

#include <nvif/class.h>
#include <nvif/object.h>
#include <nvif/if000c.h>
#include <nvif/if500b.h>
#include <nvif/if900b.h>

@@ -176,6 +177,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
		.end		= vmf->address + PAGE_SIZE,
		.src		= &src,
		.dst		= &dst,
		.src_owner	= drm->dev,
	};

	/*
@@ -526,6 +528,7 @@ nouveau_dmem_init(struct nouveau_drm *drm)
	drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
	drm->dmem->pagemap.res = *res;
	drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops;
	drm->dmem->pagemap.owner = drm->dev;
	if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap)))
		goto out_free;

@@ -669,12 +672,6 @@ out:
	return ret;
}

static inline bool
nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
{
	return is_device_private_page(page) && drm->dmem == page_to_dmem(page);
}

void
nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
			 struct hmm_range *range)
@@ -690,18 +687,12 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
		if (page == NULL)
			continue;

		if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
		if (!is_device_private_page(page))
			continue;
		}

		if (!nouveau_dmem_page(drm, page)) {
			WARN(1, "Some unknown device memory !\n");
			range->pfns[i] = 0;
			continue;
		}

		addr = nouveau_dmem_page_addr(page);
		range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
		range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
		range->pfns[i] |= NVIF_VMM_PFNMAP_V0_VRAM;
	}
}
+1 −2
Original line number Diff line number Diff line
@@ -367,7 +367,6 @@ static const u64
nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = {
	[HMM_PFN_VALID         ] = NVIF_VMM_PFNMAP_V0_V,
	[HMM_PFN_WRITE         ] = NVIF_VMM_PFNMAP_V0_W,
	[HMM_PFN_DEVICE_PRIVATE] = NVIF_VMM_PFNMAP_V0_VRAM,
};

static const u64
@@ -541,7 +540,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
		range.default_flags = 0;
		range.pfn_flags_mask = -1UL;
		down_read(&mm->mmap_sem);
		ret = hmm_range_fault(&range, 0);
		ret = hmm_range_fault(&range);
		up_read(&mm->mmap_sem);
		if (ret <= 0) {
			if (ret == 0 || ret == -EBUSY)
Loading