Commit cfa3b806 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull hmm updates from Jason Gunthorpe:
 "This series adds a selftest for hmm_range_fault() and several of the
  DEVICE_PRIVATE migration related actions, and another simplification
  for hmm_range_fault()'s API.

   - Simplify hmm_range_fault() with a simpler return code, no
     HMM_PFN_SPECIAL, and no customizable output PFN format

   - Add a selftest for hmm_range_fault() and DEVICE_PRIVATE related
     functionality"

* tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  MAINTAINERS: add HMM selftests
  mm/hmm/test: add selftests for HMM
  mm/hmm/test: add selftest driver for HMM
  mm/hmm: remove the customizable pfn format from hmm_range_fault
  mm/hmm: remove HMM_PFN_SPECIAL
  drm/amdgpu: remove dead code after hmm_range_fault()
  mm/hmm: make hmm_range_fault return 0 or -1
parents 19409891 f07e2f6b
Loading
Loading
Loading
Loading
+11 −19
Original line number Diff line number Diff line
@@ -161,7 +161,7 @@ device must complete the update before the driver callback returns.
When the device driver wants to populate a range of virtual addresses, it can
use::

  long hmm_range_fault(struct hmm_range *range);
  int hmm_range_fault(struct hmm_range *range);

It will trigger a page fault on missing or read-only entries if write access is
requested (see below). Page faults use the generic mm page fault code path just
@@ -184,10 +184,7 @@ The usage pattern is::
      range.notifier = &interval_sub;
      range.start = ...;
      range.end = ...;
      range.pfns = ...;
      range.flags = ...;
      range.values = ...;
      range.pfn_shift = ...;
      range.hmm_pfns = ...;

      if (!mmget_not_zero(interval_sub->notifier.mm))
          return -EFAULT;
@@ -229,15 +226,10 @@ The hmm_range struct has 2 fields, default_flags and pfn_flags_mask, that specif
fault or snapshot policy for the whole range instead of having to set them
for each entry in the pfns array.

For instance, if the device flags for range.flags are::
For instance if the device driver wants pages for a range with at least read
permission, it sets::

    range.flags[HMM_PFN_VALID] = (1 << 63);
    range.flags[HMM_PFN_WRITE] = (1 << 62);

and the device driver wants pages for a range with at least read permission,
it sets::

    range->default_flags = (1 << 63);
    range->default_flags = HMM_PFN_REQ_FAULT;
    range->pfn_flags_mask = 0;

and calls hmm_range_fault() as described above. This will fill fault all pages
@@ -246,18 +238,18 @@ in the range with at least read permission.
Now let's say the driver wants to do the same except for one page in the range for
which it wants to have write permission. Now driver set::

    range->default_flags = (1 << 63);
    range->pfn_flags_mask = (1 << 62);
    range->pfns[index_of_write] = (1 << 62);
    range->default_flags = HMM_PFN_REQ_FAULT;
    range->pfn_flags_mask = HMM_PFN_REQ_WRITE;
    range->pfns[index_of_write] = HMM_PFN_REQ_WRITE;

With this, HMM will fault in all pages with at least read (i.e., valid) and for the
address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
write permission i.e., if the CPU pte does not have write permission set then HMM
will call handle_mm_fault().

Note that HMM will populate the pfns array with write permission for any page
that is mapped with CPU write permission no matter what values are set
in default_flags or pfn_flags_mask.
After hmm_range_fault completes the flag bits are set to the current state of
the page tables, ie HMM_PFN_VALID | HMM_PFN_WRITE will be set if the page is
writable.


Represent and manage device memory from core kernel point of view
+2 −0
Original line number Diff line number Diff line
@@ -7768,7 +7768,9 @@ L: linux-mm@kvack.org
S:	Maintained
F:	Documentation/vm/hmm.rst
F:	include/linux/hmm*
F:	lib/test_hmm*
F:	mm/hmm*
F:	tools/testing/selftests/vm/*hmm*
HOST AP DRIVER
M:	Jouni Malinen <j@w1.fi>
+18 −38
Original line number Diff line number Diff line
@@ -766,18 +766,6 @@ struct amdgpu_ttm_tt {
};

#ifdef CONFIG_DRM_AMDGPU_USERPTR
/* flags used by HMM internal, not related to CPU/GPU PTE flags */
static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
	(1 << 0), /* HMM_PFN_VALID */
	(1 << 1), /* HMM_PFN_WRITE */
};

static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
	0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
	0, /* HMM_PFN_NONE */
	0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
};

/**
 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
 * memory and start HMM tracking CPU page table update
@@ -816,18 +804,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
		goto out;
	}
	range->notifier = &bo->notifier;
	range->flags = hmm_range_flags;
	range->values = hmm_range_values;
	range->pfn_shift = PAGE_SHIFT;
	range->start = bo->notifier.interval_tree.start;
	range->end = bo->notifier.interval_tree.last + 1;
	range->default_flags = hmm_range_flags[HMM_PFN_VALID];
	range->default_flags = HMM_PFN_REQ_FAULT;
	if (!amdgpu_ttm_tt_is_readonly(ttm))
		range->default_flags |= range->flags[HMM_PFN_WRITE];
		range->default_flags |= HMM_PFN_REQ_WRITE;

	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
				     GFP_KERNEL);
	if (unlikely(!range->pfns)) {
	range->hmm_pfns = kvmalloc_array(ttm->num_pages,
					 sizeof(*range->hmm_pfns), GFP_KERNEL);
	if (unlikely(!range->hmm_pfns)) {
		r = -ENOMEM;
		goto out_free_ranges;
	}
@@ -852,27 +837,23 @@ retry:
	down_read(&mm->mmap_sem);
	r = hmm_range_fault(range);
	up_read(&mm->mmap_sem);
	if (unlikely(r <= 0)) {
	if (unlikely(r)) {
		/*
		 * FIXME: This timeout should encompass the retry from
		 * mmu_interval_read_retry() as well.
		 */
		if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
		if (r == -EBUSY && !time_after(jiffies, timeout))
			goto retry;
		goto out_free_pfns;
	}

	for (i = 0; i < ttm->num_pages; i++) {
		/* FIXME: The pages cannot be touched outside the notifier_lock */
		pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
		if (unlikely(!pages[i])) {
			pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
			       i, range->pfns[i]);
			r = -ENOMEM;

			goto out_free_pfns;
		}
	}
	/*
	 * Due to default_flags, all pages are HMM_PFN_VALID or
	 * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
	 * the notifier_lock, and mmu_interval_read_retry() must be done first.
	 */
	for (i = 0; i < ttm->num_pages; i++)
		pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);

	gtt->range = range;
	mmput(mm);
@@ -882,7 +863,7 @@ retry:
out_unlock:
	up_read(&mm->mmap_sem);
out_free_pfns:
	kvfree(range->pfns);
	kvfree(range->hmm_pfns);
out_free_ranges:
	kfree(range);
out:
@@ -907,7 +888,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
		gtt->userptr, ttm->num_pages);

	WARN_ONCE(!gtt->range || !gtt->range->pfns,
	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
		"No user pages to check\n");

	if (gtt->range) {
@@ -917,7 +898,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
		 */
		r = mmu_interval_read_retry(gtt->range->notifier,
					 gtt->range->notifier_seq);
		kvfree(gtt->range->pfns);
		kvfree(gtt->range->hmm_pfns);
		kfree(gtt->range);
		gtt->range = NULL;
	}
@@ -1008,8 +989,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)

		for (i = 0; i < ttm->num_pages; i++) {
			if (ttm->pages[i] !=
				hmm_device_entry_to_page(gtt->range,
					      gtt->range->pfns[i]))
			    hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
				break;
		}

+1 −26
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ static inline struct nouveau_dmem *page_to_dmem(struct page *page)
	return container_of(page->pgmap, struct nouveau_dmem, pagemap);
}

static unsigned long nouveau_dmem_page_addr(struct page *page)
unsigned long nouveau_dmem_page_addr(struct page *page)
{
	struct nouveau_dmem_chunk *chunk = page->zone_device_data;
	unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
@@ -671,28 +671,3 @@ out_free_src:
out:
	return ret;
}

void
nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
			 struct hmm_range *range)
{
	unsigned long i, npages;

	npages = (range->end - range->start) >> PAGE_SHIFT;
	for (i = 0; i < npages; ++i) {
		struct page *page;
		uint64_t addr;

		page = hmm_device_entry_to_page(range, range->pfns[i]);
		if (page == NULL)
			continue;

		if (!is_device_private_page(page))
			continue;

		addr = nouveau_dmem_page_addr(page);
		range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
		range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
		range->pfns[i] |= NVIF_VMM_PFNMAP_V0_VRAM;
	}
}
+1 −2
Original line number Diff line number Diff line
@@ -37,9 +37,8 @@ int nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
			     struct vm_area_struct *vma,
			     unsigned long start,
			     unsigned long end);
unsigned long nouveau_dmem_page_addr(struct page *page);

void nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
			      struct hmm_range *range);
#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
static inline void nouveau_dmem_init(struct nouveau_drm *drm) {}
static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {}
Loading