Commit 0e7e6198 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge branch 'ttm-transhuge' of git://people.freedesktop.org/~thomash/linux into drm-next



Huge page-table entries for TTM

In order to reduce CPU usage [1] and in theory TLB misses this patchset enables
huge- and giant page-table entries for TTM and TTM-enabled graphics drivers.

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Thomas Hellstrom (VMware) <thomas_os@shipmail.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20200325073102.6129-1-thomas_os@shipmail.org
parents 59e7a8cc 9431042d
Loading
Loading
Loading
Loading
+141 −0
Original line number Diff line number Diff line
@@ -48,6 +48,11 @@
#include "drm_internal.h"
#include "drm_legacy.h"

#if defined(CONFIG_MMU) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
#include <uapi/asm/mman.h>
#include <drm/drm_vma_manager.h>
#endif

/* from BKL pushdown */
DEFINE_MUTEX(drm_global_mutex);

@@ -872,3 +877,139 @@ struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags)
	return file;
}
EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile);

#ifdef CONFIG_MMU
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
 * drm_addr_inflate() attempts to construct an aligned area by inflating
 * the area size and skipping the unaligned start of the area.
 * adapted from shmem_get_unmapped_area()
 */
static unsigned long drm_addr_inflate(unsigned long addr,
				      unsigned long len,
				      unsigned long pgoff,
				      unsigned long flags,
				      unsigned long huge_size)
{
	unsigned long offset, inflated_len;
	unsigned long inflated_addr;
	unsigned long inflated_offset;

	offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
	if (offset && offset + len < 2 * huge_size)
		return addr;
	if ((addr & (huge_size - 1)) == offset)
		return addr;

	inflated_len = len + huge_size - PAGE_SIZE;
	if (inflated_len > TASK_SIZE)
		return addr;
	if (inflated_len < len)
		return addr;

	inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len,
						       0, flags);
	if (IS_ERR_VALUE(inflated_addr))
		return addr;
	if (inflated_addr & ~PAGE_MASK)
		return addr;

	inflated_offset = inflated_addr & (huge_size - 1);
	inflated_addr += offset - inflated_offset;
	if (inflated_offset > offset)
		inflated_addr += huge_size;

	if (inflated_addr > TASK_SIZE - len)
		return addr;

	return inflated_addr;
}

/**
 * drm_get_unmapped_area() - Get an unused user-space virtual memory area
 * suitable for huge page table entries.
 * @file: The struct file representing the address space being mmap()'d.
 * @uaddr: Start address suggested by user-space.
 * @len: Length of the area.
 * @pgoff: The page offset into the address space.
 * @flags: mmap flags
 * @mgr: The address space manager used by the drm driver. This argument can
 * probably be removed at some point when all drivers use the same
 * address space manager.
 *
 * This function attempts to find an unused user-space virtual memory area
 * that can accommodate the size we want to map, and that is properly
 * aligned to facilitate huge page table entries matching actual
 * huge pages or huge page aligned memory in buffer objects. Buffer objects
 * are assumed to start at huge page boundary pfns (io memory) or be
 * populated by huge pages aligned to the start of the buffer object
 * (system- or coherent memory). Adapted from shmem_get_unmapped_area.
 *
 * Return: aligned user-space address.
 */
unsigned long drm_get_unmapped_area(struct file *file,
				    unsigned long uaddr, unsigned long len,
				    unsigned long pgoff, unsigned long flags,
				    struct drm_vma_offset_manager *mgr)
{
	unsigned long addr;
	unsigned long inflated_addr;
	struct drm_vma_offset_node *node;

	if (len > TASK_SIZE)
		return -ENOMEM;

	/*
	 * @pgoff is the file page-offset the huge page boundaries of
	 * which typically aligns to physical address huge page boundaries.
	 * That's not true for DRM, however, where physical address huge
	 * page boundaries instead are aligned with the offset from
	 * buffer object start. So adjust @pgoff to be the offset from
	 * buffer object start.
	 */
	drm_vma_offset_lock_lookup(mgr);
	node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
	if (node)
		pgoff -= node->vm_node.start;
	drm_vma_offset_unlock_lookup(mgr);

	addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
	if (IS_ERR_VALUE(addr))
		return addr;
	if (addr & ~PAGE_MASK)
		return addr;
	if (addr > TASK_SIZE - len)
		return addr;

	if (len < HPAGE_PMD_SIZE)
		return addr;
	if (flags & MAP_FIXED)
		return addr;
	/*
	 * Our priority is to support MAP_SHARED mapped hugely;
	 * and support MAP_PRIVATE mapped hugely too, until it is COWed.
	 * But if caller specified an address hint, respect that as before.
	 */
	if (uaddr)
		return addr;

	inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
					 HPAGE_PMD_SIZE);

	if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
	    len >= HPAGE_PUD_SIZE)
		inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
						 flags, HPAGE_PUD_SIZE);
	return inflated_addr;
}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
unsigned long drm_get_unmapped_area(struct file *file,
				    unsigned long uaddr, unsigned long len,
				    unsigned long pgoff, unsigned long flags,
				    struct drm_vma_offset_manager *mgr)
{
	return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
#endif /* CONFIG_MMU */
+158 −3
Original line number Diff line number Diff line
@@ -156,6 +156,89 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_vm_reserve);

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/**
 * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
 * @vmf: Fault data
 * @bo: The buffer object
 * @page_offset: Page offset from bo start
 * @fault_page_size: The size of the fault in pages.
 * @pgprot: The page protections.
 * Does additional checking whether it's possible to insert a PUD or PMD
 * pfn and performs the insertion.
 *
 * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
 * a huge fault was not possible, or on insertion error.
 */
static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
					struct ttm_buffer_object *bo,
					pgoff_t page_offset,
					pgoff_t fault_page_size,
					pgprot_t pgprot)
{
	pgoff_t i;
	vm_fault_t ret;
	unsigned long pfn;
	pfn_t pfnt;
	struct ttm_tt *ttm = bo->ttm;
	bool write = vmf->flags & FAULT_FLAG_WRITE;

	/* Fault should not cross bo boundary. */
	page_offset &= ~(fault_page_size - 1);
	if (page_offset + fault_page_size > bo->num_pages)
		goto out_fallback;

	if (bo->mem.bus.is_iomem)
		pfn = ttm_bo_io_mem_pfn(bo, page_offset);
	else
		pfn = page_to_pfn(ttm->pages[page_offset]);

	/* pfn must be fault_page_size aligned. */
	if ((pfn & (fault_page_size - 1)) != 0)
		goto out_fallback;

	/* Check that memory is contiguous. */
	if (!bo->mem.bus.is_iomem) {
		for (i = 1; i < fault_page_size; ++i) {
			if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
				goto out_fallback;
		}
	} else if (bo->bdev->driver->io_mem_pfn) {
		for (i = 1; i < fault_page_size; ++i) {
			if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
				goto out_fallback;
		}
	}

	pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
	if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
		ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
	else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
		ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
#endif
	else
		WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);

	if (ret != VM_FAULT_NOPAGE)
		goto out_fallback;

	return VM_FAULT_NOPAGE;
out_fallback:
	count_vm_event(THP_FAULT_FALLBACK);
	return VM_FAULT_FALLBACK;
}
#else
static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
					struct ttm_buffer_object *bo,
					pgoff_t page_offset,
					pgoff_t fault_page_size,
					pgprot_t pgprot)
{
	return VM_FAULT_FALLBACK;
}
#endif

/**
 * ttm_bo_vm_fault_reserved - TTM fault helper
 * @vmf: The struct vm_fault given as argument to the fault callback
@@ -163,6 +246,7 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
 * @num_prefault: Maximum number of prefault pages. The caller may want to
 * specify this based on madvice settings and the size of the GPU object
 * backed by the memory.
 * @fault_page_size: The size of the fault in pages.
 *
 * This function inserts one or more page table entries pointing to the
 * memory backing the buffer object, and then returns a return code
@@ -176,7 +260,8 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
 */
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
				    pgprot_t prot,
				    pgoff_t num_prefault)
				    pgoff_t num_prefault,
				    pgoff_t fault_page_size)
{
	struct vm_area_struct *vma = vmf->vma;
	struct ttm_buffer_object *bo = vma->vm_private_data;
@@ -268,6 +353,13 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
		prot = pgprot_decrypted(prot);
	}

	/* We don't prefault on huge faults. Yet. */
	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) {
		ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset,
					    fault_page_size, prot);
		goto out_io_unlock;
	}

	/*
	 * Speculatively prefault a number of pages. Only error on
	 * first page.
@@ -334,7 +426,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
		return ret;

	prot = vma->vm_page_prot;
	ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
	ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
		return ret;

@@ -344,6 +436,66 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(ttm_bo_vm_fault);

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/**
 * ttm_pgprot_is_wrprotecting - Is a page protection value write-protecting?
 * @prot: The page protection value
 *
 * Return: true if @prot is write-protecting. false otherwise.
 */
static bool ttm_pgprot_is_wrprotecting(pgprot_t prot)
{
	/*
	 * This is meant to say "pgprot_wrprotect(prot) == prot" in a generic
	 * way. Unfortunately there is no generic pgprot_wrprotect.
	 */
	return pte_val(pte_wrprotect(__pte(pgprot_val(prot)))) ==
		pgprot_val(prot);
}

static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf,
				       enum page_entry_size pe_size)
{
	struct vm_area_struct *vma = vmf->vma;
	pgprot_t prot;
	struct ttm_buffer_object *bo = vma->vm_private_data;
	vm_fault_t ret;
	pgoff_t fault_page_size = 0;
	bool write = vmf->flags & FAULT_FLAG_WRITE;

	switch (pe_size) {
	case PE_SIZE_PMD:
		fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
		break;
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
	case PE_SIZE_PUD:
		fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
		break;
#endif
	default:
		WARN_ON_ONCE(1);
		return VM_FAULT_FALLBACK;
	}

	/* Fallback on write dirty-tracking or COW */
	if (write && ttm_pgprot_is_wrprotecting(vma->vm_page_prot))
		return VM_FAULT_FALLBACK;

	ret = ttm_bo_vm_reserve(bo, vmf);
	if (ret)
		return ret;

	prot = vm_get_page_prot(vma->vm_flags);
	ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
		return ret;

	dma_resv_unlock(bo->base.resv);

	return ret;
}
#endif

void ttm_bo_vm_open(struct vm_area_struct *vma)
{
	struct ttm_buffer_object *bo = vma->vm_private_data;
@@ -445,7 +597,10 @@ static const struct vm_operations_struct ttm_bo_vm_ops = {
	.fault = ttm_bo_vm_fault,
	.open = ttm_bo_vm_open,
	.close = ttm_bo_vm_close,
	.access = ttm_bo_vm_access
	.access = ttm_bo_vm_access,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	.huge_fault = ttm_bo_vm_huge_fault,
#endif
};

static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
+1 −0
Original line number Diff line number Diff line
@@ -11,4 +11,5 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
	    vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \
	    ttm_object.o ttm_lock.o

vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
+13 −0
Original line number Diff line number Diff line
@@ -1247,6 +1247,18 @@ static void vmw_remove(struct pci_dev *pdev)
	pci_disable_device(pdev);
}

static unsigned long
vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
		      unsigned long len, unsigned long pgoff,
		      unsigned long flags)
{
	struct drm_file *file_priv = file->private_data;
	struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);

	return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
				     &dev_priv->vma_manager);
}

static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
			      void *ptr)
{
@@ -1418,6 +1430,7 @@ static const struct file_operations vmwgfx_driver_fops = {
	.compat_ioctl = vmw_compat_ioctl,
#endif
	.llseek = noop_llseek,
	.get_unmapped_area = vmw_get_unmapped_area,
};

static struct drm_driver driver = {
+12 −0
Original line number Diff line number Diff line
@@ -1000,6 +1000,7 @@ extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma);

extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv,
					size_t gran);

/**
 * TTM buffer object driver - vmwgfx_ttm_buffer.c
 */
@@ -1510,6 +1511,17 @@ void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
			pgoff_t start, pgoff_t end);
vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
				enum page_entry_size pe_size);
#endif

/* Transparent hugepage support - vmwgfx_thp.c */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern const struct ttm_mem_type_manager_func vmw_thp_func;
#else
#define vmw_thp_func ttm_bo_manager_func
#endif

/**
 * VMW_DEBUG_KMS - Debug output for kernel mode-setting
Loading