Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma (fffe3ae0) · Commits · 戴 / test

arch/powerpc/kvm/book3s_hv_uvmem.c

+3 −1

Original line number	Diff line number	Diff line
		@@ -400,6 +400,7 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
		mig.end = end;
		mig.src = &src_pfn;
		mig.dst = &dst_pfn;
		mig.flags = MIGRATE_VMA_SELECT_SYSTEM;

		/*
		* We come here with mmap_lock write lock held just for
		@@ -577,7 +578,8 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
		mig.end = end;
		mig.src = &src_pfn;
		mig.dst = &dst_pfn;
		mig.src_owner = &kvmppc_uvmem_pgmap;
		mig.pgmap_owner = &kvmppc_uvmem_pgmap;
		mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;

		mutex_lock(&kvm->arch.uvmem_lock);
		/* The requested page is already paged-out, nothing to do */

drivers/gpu/drm/nouveau/nouveau_dmem.c

+15 −4

Original line number	Diff line number	Diff line
		@@ -140,6 +140,7 @@ static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
		{
		struct device *dev = drm->dev->dev;
		struct page dpage, spage;
		struct nouveau_svmm *svmm;

		spage = migrate_pfn_to_page(args->src[0]);
		if (!spage \|\| !(args->src[0] & MIGRATE_PFN_MIGRATE))
		@@ -154,14 +155,19 @@ static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
		if (dma_mapping_error(dev, *dma_addr))
		goto error_free_page;

		svmm = spage->zone_device_data;
		mutex_lock(&svmm->mutex);
		nouveau_svmm_invalidate(svmm, args->start, args->end);
		if (drm->dmem->migrate.copy_func(drm, 1, NOUVEAU_APER_HOST, *dma_addr,
		NOUVEAU_APER_VRAM, nouveau_dmem_page_addr(spage)))
		goto error_dma_unmap;
		mutex_unlock(&svmm->mutex);

		args->dst[0] = migrate_pfn(page_to_pfn(dpage)) \| MIGRATE_PFN_LOCKED;
		return 0;

		error_dma_unmap:
		mutex_unlock(&svmm->mutex);
		dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
		error_free_page:
		__free_page(dpage);
		@@ -182,7 +188,8 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
		.end = vmf->address + PAGE_SIZE,
		.src = &src,
		.dst = &dst,
		.src_owner = drm->dev,
		.pgmap_owner = drm->dev,
		.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
		};

		/*
		@@ -530,7 +537,8 @@ nouveau_dmem_init(struct nouveau_drm *drm)
		}

		static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
		unsigned long src, dma_addr_t dma_addr, u64 pfn)
		struct nouveau_svmm *svmm, unsigned long src,
		dma_addr_t dma_addr, u64 pfn)
		{
		struct device *dev = drm->dev->dev;
		struct page dpage, spage;
		@@ -560,6 +568,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
		goto out_free_page;
		}

		dpage->zone_device_data = svmm;
		*pfn = NVIF_VMM_PFNMAP_V0_V \| NVIF_VMM_PFNMAP_V0_VRAM \|
		((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
		if (src & MIGRATE_PFN_WRITE)
		@@ -583,8 +592,8 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
		unsigned long addr = args->start, nr_dma = 0, i;

		for (i = 0; addr < args->end; i++) {
		args->dst[i] = nouveau_dmem_migrate_copy_one(drm, args->src[i],
		dma_addrs + nr_dma, pfns + i);
		args->dst[i] = nouveau_dmem_migrate_copy_one(drm, svmm,
		args->src[i], dma_addrs + nr_dma, pfns + i);
		if (!dma_mapping_error(drm->dev->dev, dma_addrs[nr_dma]))
		nr_dma++;
		addr += PAGE_SIZE;
		@@ -615,6 +624,8 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
		struct migrate_vma args = {
		.vma = vma,
		.start = start,
		.pgmap_owner = drm->dev,
		.flags = MIGRATE_VMA_SELECT_SYSTEM,
		};
		unsigned long i;
		u64 *pfns;

drivers/gpu/drm/nouveau/nouveau_svm.c

+107 −150

Original line number	Diff line number	Diff line
		@@ -93,17 +93,6 @@ nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
		return NULL;
		}

		struct nouveau_svmm {
		struct mmu_notifier notifier;
		struct nouveau_vmm *vmm;
		struct {
		unsigned long start;
		unsigned long limit;
		} unmanaged;

		struct mutex mutex;
		};

		#define SVMM_DBG(s,f,a...) \
		NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
		#define SVMM_ERR(s,f,a...) \
		@@ -246,7 +235,7 @@ nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
		}

		/* Invalidate SVMM address-range on GPU. */
		static void
		void
		nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
		{
		if (limit > start) {
		@@ -279,6 +268,14 @@ nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
		if (unlikely(!svmm->vmm))
		goto out;

		/*
		* Ignore invalidation callbacks for device private pages since
		* the invalidation is handled as part of the migration process.
		*/
		if (update->event == MMU_NOTIFY_MIGRATE &&
		update->migrate_pgmap_owner == svmm->vmm->cli->drm->dev)
		goto out;

		if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
		if (start < svmm->unmanaged.start) {
		nouveau_svmm_invalidate(svmm, start,
		@@ -514,53 +511,68 @@ static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
		};

		static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
		struct hmm_range range, u64 ioctl_addr)
		struct hmm_range *range,
		struct nouveau_pfnmap_args *args)
		{
		unsigned long i, npages;
		struct page *page;

		/*
		* The ioctl_addr prepared here is passed through nvif_object_ioctl()
		* The address prepared here is passed through nvif_object_ioctl()
		* to an eventual DMA map in something like gp100_vmm_pgt_pfn()
		*
		* This is all just encoding the internal hmm representation into a
		* different nouveau internal representation.
		*/
		npages = (range->end - range->start) >> PAGE_SHIFT;
		for (i = 0; i < npages; ++i) {
		struct page *page;

		if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) {
		ioctl_addr[i] = 0;
		continue;
		if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) {
		args->p.phys[0] = 0;
		return;
		}

		page = hmm_pfn_to_page(range->hmm_pfns[i]);
		page = hmm_pfn_to_page(range->hmm_pfns[0]);
		/*
		* Only map compound pages to the GPU if the CPU is also mapping the
		* page as a compound page. Otherwise, the PTE protections might not be
		* consistent (e.g., CPU only maps part of a compound page).
		* Note that the underlying page might still be larger than the
		* CPU mapping (e.g., a PUD sized compound page partially mapped with
		* a PMD sized page table entry).
		*/
		if (hmm_pfn_to_map_order(range->hmm_pfns[0])) {
		unsigned long addr = args->p.addr;

		args->p.page = hmm_pfn_to_map_order(range->hmm_pfns[0]) +
		PAGE_SHIFT;
		args->p.size = 1UL << args->p.page;
		args->p.addr &= ~(args->p.size - 1);
		page -= (addr - args->p.addr) >> PAGE_SHIFT;
		}
		if (is_device_private_page(page))
		ioctl_addr[i] = nouveau_dmem_page_addr(page) \|
		args->p.phys[0] = nouveau_dmem_page_addr(page) \|
		NVIF_VMM_PFNMAP_V0_V \|
		NVIF_VMM_PFNMAP_V0_VRAM;
		else
		ioctl_addr[i] = page_to_phys(page) \|
		args->p.phys[0] = page_to_phys(page) \|
		NVIF_VMM_PFNMAP_V0_V \|
		NVIF_VMM_PFNMAP_V0_HOST;
		if (range->hmm_pfns[i] & HMM_PFN_WRITE)
		ioctl_addr[i] \|= NVIF_VMM_PFNMAP_V0_W;
		}
		if (range->hmm_pfns[0] & HMM_PFN_WRITE)
		args->p.phys[0] \|= NVIF_VMM_PFNMAP_V0_W;
		}

		static int nouveau_range_fault(struct nouveau_svmm *svmm,
		struct nouveau_drm drm, void data, u32 size,
		unsigned long hmm_pfns[], u64 *ioctl_addr,
		struct nouveau_drm *drm,
		struct nouveau_pfnmap_args *args, u32 size,
		unsigned long hmm_flags,
		struct svm_notifier *notifier)
		{
		unsigned long timeout =
		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
		/* Have HMM fault pages within the fault window to the GPU. */
		unsigned long hmm_pfns[1];
		struct hmm_range range = {
		.notifier = &notifier->notifier,
		.start = notifier->notifier.interval_tree.start,
		.end = notifier->notifier.interval_tree.last + 1,
		.pfn_flags_mask = HMM_PFN_REQ_FAULT \| HMM_PFN_REQ_WRITE,
		.default_flags = hmm_flags,
		.hmm_pfns = hmm_pfns,
		.dev_private_owner = drm->dev,
		};
		@@ -576,11 +588,6 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
		ret = hmm_range_fault(&range);
		mmap_read_unlock(mm);
		if (ret) {
		/*
		* FIXME: the input PFN_REQ flags are destroyed on
		* -EBUSY, we need to regenerate them, also for the
		* other continue below
		*/
		if (ret == -EBUSY)
		continue;
		return ret;
		@@ -595,10 +602,10 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
		break;
		}

		nouveau_hmm_convert_pfn(drm, &range, ioctl_addr);
		nouveau_hmm_convert_pfn(drm, &range, args);

		svmm->vmm->vmm.object.client->super = true;
		ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
		ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
		svmm->vmm->vmm.object.client->super = false;
		mutex_unlock(&svmm->mutex);

		@@ -615,17 +622,12 @@ nouveau_svm_fault(struct nvif_notify *notify)
		struct nvif_object *device = &svm->drm->client.device.object;
		struct nouveau_svmm *svmm;
		struct {
		struct {
		struct nvif_ioctl_v0 i;
		struct nvif_ioctl_mthd_v0 m;
		struct nvif_vmm_pfnmap_v0 p;
		} i;
		u64 phys[16];
		struct nouveau_pfnmap_args i;
		u64 phys[1];
		} args;
		unsigned long hmm_pfns[ARRAY_SIZE(args.phys)];
		struct vm_area_struct *vma;
		unsigned long hmm_flags;
		u64 inst, start, limit;
		int fi, fn, pi, fill;
		int fi, fn;
		int replay = 0, ret;

		/* Parse available fault buffer entries into a cache, and update
		@@ -692,129 +694,84 @@ nouveau_svm_fault(struct nvif_notify *notify)
		* window into a single update.
		*/
		start = buffer->fault[fi]->addr;
		limit = start + (ARRAY_SIZE(args.phys) << PAGE_SHIFT);
		limit = start + PAGE_SIZE;
		if (start < svmm->unmanaged.limit)
		limit = min_t(u64, limit, svmm->unmanaged.start);
		SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);

		mm = svmm->notifier.mm;
		if (!mmget_not_zero(mm)) {
		nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
		continue;
		}

		/* Intersect fault window with the CPU VMA, cancelling
		* the fault if the address is invalid.
		*/
		mmap_read_lock(mm);
		vma = find_vma_intersection(mm, start, limit);
		if (!vma) {
		SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit);
		mmap_read_unlock(mm);
		mmput(mm);
		nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
		continue;
		}
		start = max_t(u64, start, vma->vm_start);
		limit = min_t(u64, limit, vma->vm_end);
		mmap_read_unlock(mm);
		SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);

		if (buffer->fault[fi]->addr != start) {
		SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr);
		mmput(mm);
		nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
		continue;
		}

		/* Prepare the GPU-side update of all pages within the
		/*
		* Prepare the GPU-side update of all pages within the
		* fault window, determining required pages and access
		* permissions based on pending faults.
		*/
		args.i.p.page = PAGE_SHIFT;
		args.i.p.addr = start;
		for (fn = fi, pi = 0;;) {
		/* Determine required permissions based on GPU fault
		args.i.p.page = PAGE_SHIFT;
		args.i.p.size = PAGE_SIZE;
		/*
		* Determine required permissions based on GPU fault
		* access flags.
		* XXX: atomic?
		*/
		switch (buffer->fault[fn]->access) {
		switch (buffer->fault[fi]->access) {
		case 0: /* READ. */
		hmm_pfns[pi++] = HMM_PFN_REQ_FAULT;
		hmm_flags = HMM_PFN_REQ_FAULT;
		break;
		case 3: /* PREFETCH. */
		hmm_pfns[pi++] = 0;
		hmm_flags = 0;
		break;
		default:
		hmm_pfns[pi++] = HMM_PFN_REQ_FAULT \|
		HMM_PFN_REQ_WRITE;
		hmm_flags = HMM_PFN_REQ_FAULT \| HMM_PFN_REQ_WRITE;
		break;
		}
		args.i.p.size = pi << PAGE_SHIFT;

		/* It's okay to skip over duplicate addresses from the
		* same SVMM as faults are ordered by access type such
		* that only the first one needs to be handled.
		*
		* ie. WRITE faults appear first, thus any handling of
		* pending READ faults will already be satisfied.
		*/
		while (++fn < buffer->fault_nr &&
		buffer->fault[fn]->svmm == svmm &&
		buffer->fault[fn ]->addr ==
		buffer->fault[fn - 1]->addr);

		/* If the next fault is outside the window, or all GPU
		* faults have been dealt with, we're done here.
		*/
		if (fn >= buffer->fault_nr \|\|
		buffer->fault[fn]->svmm != svmm \|\|
		buffer->fault[fn]->addr >= limit)
		break;

		/* Fill in the gap between this fault and the next. */
		fill = (buffer->fault[fn ]->addr -
		buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
		while (--fill)
		hmm_pfns[pi++] = 0;
		mm = svmm->notifier.mm;
		if (!mmget_not_zero(mm)) {
		nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
		continue;
		}

		SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)",
		args.i.p.addr,
		args.i.p.addr + args.i.p.size, fn - fi);

		notifier.svmm = svmm;
		ret = mmu_interval_notifier_insert(&notifier.notifier,
		svmm->notifier.mm,
		ret = mmu_interval_notifier_insert(&notifier.notifier, mm,
		args.i.p.addr, args.i.p.size,
		&nouveau_svm_mni_ops);
		if (!ret) {
		ret = nouveau_range_fault(
		svmm, svm->drm, &args,
		sizeof(args.i) + pi * sizeof(args.phys[0]),
		hmm_pfns, args.phys, &notifier);
		ret = nouveau_range_fault(svmm, svm->drm, &args.i,
		sizeof(args), hmm_flags, &notifier);
		mmu_interval_notifier_remove(&notifier.notifier);
		}
		mmput(mm);

		/* Cancel any faults in the window whose pages didn't manage
		* to keep their valid bit, or stay writeable when required.
		limit = args.i.p.addr + args.i.p.size;
		for (fn = fi; ++fn < buffer->fault_nr; ) {
		/* It's okay to skip over duplicate addresses from the
		* same SVMM as faults are ordered by access type such
		* that only the first one needs to be handled.
		*
		* If handling failed completely, cancel all faults.
		* ie. WRITE faults appear first, thus any handling of
		* pending READ faults will already be satisfied.
		* But if a large page is mapped, make sure subsequent
		* fault addresses have sufficient access permission.
		*/
		if (buffer->fault[fn]->svmm != svmm \|\|
		buffer->fault[fn]->addr >= limit \|\|
		(buffer->fault[fi]->access == 0 /* READ. */ &&
		!(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) \|\|
		(buffer->fault[fi]->access != 0 /* READ. */ &&
		buffer->fault[fi]->access != 3 /* PREFETCH. */ &&
		!(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)))
		break;
		}

		/* If handling failed completely, cancel all faults. */
		if (ret) {
		while (fi < fn) {
		struct nouveau_svm_fault *fault = buffer->fault[fi++];
		pi = (fault->addr - args.i.p.addr) >> PAGE_SHIFT;
		if (ret \|\|
		!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_V) \|\|
		(!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_W) &&
		fault->access != 0 && fault->access != 3)) {
		struct nouveau_svm_fault *fault =
		buffer->fault[fi++];

		nouveau_svm_fault_cancel_fault(svm, fault);
		continue;
		}
		} else
		replay++;
		}
		}

		/* Issue fault replay to the GPU. */
		if (replay)

drivers/gpu/drm/nouveau/nouveau_svm.h

+12 −1

Original line number	Diff line number	Diff line
		#ifndef __NOUVEAU_SVM_H__
		#define __NOUVEAU_SVM_H__
		#include <nvif/os.h>
		#include <linux/mmu_notifier.h>
		struct drm_device;
		struct drm_file;
		struct nouveau_drm;

		struct nouveau_svmm;
		struct nouveau_svmm {
		struct mmu_notifier notifier;
		struct nouveau_vmm *vmm;
		struct {
		unsigned long start;
		unsigned long limit;
		} unmanaged;

		struct mutex mutex;
		};

		#if IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)
		void nouveau_svm_init(struct nouveau_drm *);
		@@ -19,6 +29,7 @@ int nouveau_svmm_join(struct nouveau_svmm *, u64 inst);
		void nouveau_svmm_part(struct nouveau_svmm *, u64 inst);
		int nouveau_svmm_bind(struct drm_device , void , struct drm_file *);

		void nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit);
		u64 *nouveau_pfns_alloc(unsigned long npages);
		void nouveau_pfns_free(u64 *pfns);
		void nouveau_pfns_map(struct nouveau_svmm svmm, struct mm_struct mm,

drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c

+2 −3

Original line number	Diff line number	Diff line
		@@ -1204,7 +1204,6 @@ nvkm_vmm_pfn_unmap(struct nvkm_vmm *vmm, u64 addr, u64 size)
		/*TODO:
		* - Avoid PT readback (for dma_unmap etc), this might end up being dealt
		* with inside HMM, which would be a lot nicer for us to deal with.
		* - Multiple page sizes (particularly for huge page support).
		* - Support for systems without a 4KiB page size.
		*/
		int
		@@ -1220,8 +1219,8 @@ nvkm_vmm_pfn_map(struct nvkm_vmm vmm, u8 shift, u64 addr, u64 size, u64 pfn)
		/* Only support mapping where the page size of the incoming page
		* array matches a page size available for direct mapping.
		*/
		while (page->shift && page->shift != shift &&
		page->desc->func->pfn == NULL)
		while (page->shift && (page->shift != shift \|\|
		page->desc->func->pfn == NULL))
		page++;

		if (!page->shift \|\| !IS_ALIGNED(addr, 1ULL << shift) \|\|

Admin message