Commit 6501aa4e authored by Matthew Auld's avatar Matthew Auld Committed by Chris Wilson
Browse files

drm/i915: add in-kernel blitter client



The plan is to use the blitter engine for async object clearing when
using local memory, but before we can move the worker to get_pages() we
have to first tame some more of our struct_mutex usage. With this in
mind we should be able to upstream the object clearing as some
selftests, which should serve as a guinea pig for the ongoing locking
rework and upcoming async get_pages() framework.

Signed-off-by: default avatarMatthew Auld <matthew.auld@intel.com>
Cc: CQ Tang <cq.tang@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190529123108.24422-2-matthew.auld@intel.com
parent 0a4a6e74
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -90,6 +90,7 @@ obj-y += gem/
gem-y += \
	gem/i915_gem_busy.o \
	gem/i915_gem_clflush.o \
	gem/i915_gem_client_blt.o \
	gem/i915_gem_context.o \
	gem/i915_gem_dmabuf.o \
	gem/i915_gem_domain.o \
@@ -97,6 +98,7 @@ gem-y += \
	gem/i915_gem_fence.o \
	gem/i915_gem_internal.o \
	gem/i915_gem_object.o \
	gem/i915_gem_object_blt.o \
	gem/i915_gem_mman.o \
	gem/i915_gem_pages.o \
	gem/i915_gem_phys.o \
+306 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2019 Intel Corporation
 */
#include "i915_gem_client_blt.h"

#include "i915_gem_object_blt.h"
#include "intel_drv.h"

struct i915_sleeve {
	struct i915_vma *vma;
	struct drm_i915_gem_object *obj;
	struct sg_table *pages;
	struct i915_page_sizes page_sizes;
};

static int vma_set_pages(struct i915_vma *vma)
{
	struct i915_sleeve *sleeve = vma->private;

	vma->pages = sleeve->pages;
	vma->page_sizes = sleeve->page_sizes;

	return 0;
}

static void vma_clear_pages(struct i915_vma *vma)
{
	GEM_BUG_ON(!vma->pages);
	vma->pages = NULL;
}

static int vma_bind(struct i915_vma *vma,
		    enum i915_cache_level cache_level,
		    u32 flags)
{
	return vma->vm->vma_ops.bind_vma(vma, cache_level, flags);
}

static void vma_unbind(struct i915_vma *vma)
{
	vma->vm->vma_ops.unbind_vma(vma);
}

static const struct i915_vma_ops proxy_vma_ops = {
	.set_pages = vma_set_pages,
	.clear_pages = vma_clear_pages,
	.bind_vma = vma_bind,
	.unbind_vma = vma_unbind,
};

static struct i915_sleeve *create_sleeve(struct i915_address_space *vm,
					 struct drm_i915_gem_object *obj,
					 struct sg_table *pages,
					 struct i915_page_sizes *page_sizes)
{
	struct i915_sleeve *sleeve;
	struct i915_vma *vma;
	int err;

	sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL);
	if (!sleeve)
		return ERR_PTR(-ENOMEM);

	vma = i915_vma_instance(obj, vm, NULL);
	if (IS_ERR(vma)) {
		err = PTR_ERR(vma);
		goto err_free;
	}

	vma->private = sleeve;
	vma->ops = &proxy_vma_ops;

	sleeve->vma = vma;
	sleeve->obj = i915_gem_object_get(obj);
	sleeve->pages = pages;
	sleeve->page_sizes = *page_sizes;

	return sleeve;

err_free:
	kfree(sleeve);
	return ERR_PTR(err);
}

static void destroy_sleeve(struct i915_sleeve *sleeve)
{
	i915_gem_object_put(sleeve->obj);
	kfree(sleeve);
}

struct clear_pages_work {
	struct dma_fence dma;
	struct dma_fence_cb cb;
	struct i915_sw_fence wait;
	struct work_struct work;
	struct irq_work irq_work;
	struct i915_sleeve *sleeve;
	struct intel_context *ce;
	u32 value;
};

static const char *clear_pages_work_driver_name(struct dma_fence *fence)
{
	return DRIVER_NAME;
}

static const char *clear_pages_work_timeline_name(struct dma_fence *fence)
{
	return "clear";
}

static void clear_pages_work_release(struct dma_fence *fence)
{
	struct clear_pages_work *w = container_of(fence, typeof(*w), dma);

	destroy_sleeve(w->sleeve);

	i915_sw_fence_fini(&w->wait);

	BUILD_BUG_ON(offsetof(typeof(*w), dma));
	dma_fence_free(&w->dma);
}

static const struct dma_fence_ops clear_pages_work_ops = {
	.get_driver_name = clear_pages_work_driver_name,
	.get_timeline_name = clear_pages_work_timeline_name,
	.release = clear_pages_work_release,
};

static void clear_pages_signal_irq_worker(struct irq_work *work)
{
	struct clear_pages_work *w = container_of(work, typeof(*w), irq_work);

	dma_fence_signal(&w->dma);
	dma_fence_put(&w->dma);
}

static void clear_pages_dma_fence_cb(struct dma_fence *fence,
				     struct dma_fence_cb *cb)
{
	struct clear_pages_work *w = container_of(cb, typeof(*w), cb);

	if (fence->error)
		dma_fence_set_error(&w->dma, fence->error);

	/*
	 * Push the signalling of the fence into yet another worker to avoid
	 * the nightmare locking around the fence spinlock.
	 */
	irq_work_queue(&w->irq_work);
}

static void clear_pages_worker(struct work_struct *work)
{
	struct clear_pages_work *w = container_of(work, typeof(*w), work);
	struct drm_i915_private *i915 = w->ce->gem_context->i915;
	struct drm_i915_gem_object *obj = w->sleeve->obj;
	struct i915_vma *vma = w->sleeve->vma;
	struct i915_request *rq;
	int err = w->dma.error;

	if (unlikely(err))
		goto out_signal;

	if (obj->cache_dirty) {
		obj->write_domain = 0;
		if (i915_gem_object_has_struct_page(obj))
			drm_clflush_sg(w->sleeve->pages);
		obj->cache_dirty = false;
	}

	/* XXX: we need to kill this */
	mutex_lock(&i915->drm.struct_mutex);
	err = i915_vma_pin(vma, 0, 0, PIN_USER);
	if (unlikely(err))
		goto out_unlock;

	rq = i915_request_create(w->ce);
	if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
		goto out_unpin;
	}

	/* There's no way the fence has signalled */
	if (dma_fence_add_callback(&rq->fence, &w->cb,
				   clear_pages_dma_fence_cb))
		GEM_BUG_ON(1);

	if (w->ce->engine->emit_init_breadcrumb) {
		err = w->ce->engine->emit_init_breadcrumb(rq);
		if (unlikely(err))
			goto out_request;
	}

	/* XXX: more feverish nightmares await */
	i915_vma_lock(vma);
	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
	i915_vma_unlock(vma);
	if (err)
		goto out_request;

	err = intel_emit_vma_fill_blt(rq, vma, w->value);
out_request:
	if (unlikely(err)) {
		i915_request_skip(rq, err);
		err = 0;
	}

	i915_request_add(rq);
out_unpin:
	i915_vma_unpin(vma);
out_unlock:
	mutex_unlock(&i915->drm.struct_mutex);
out_signal:
	if (unlikely(err)) {
		dma_fence_set_error(&w->dma, err);
		dma_fence_signal(&w->dma);
		dma_fence_put(&w->dma);
	}
}

static int __i915_sw_fence_call
clear_pages_work_notify(struct i915_sw_fence *fence,
			enum i915_sw_fence_notify state)
{
	struct clear_pages_work *w = container_of(fence, typeof(*w), wait);

	switch (state) {
	case FENCE_COMPLETE:
		schedule_work(&w->work);
		break;

	case FENCE_FREE:
		dma_fence_put(&w->dma);
		break;
	}

	return NOTIFY_DONE;
}

static DEFINE_SPINLOCK(fence_lock);

/* XXX: better name please */
int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
				     struct intel_context *ce,
				     struct sg_table *pages,
				     struct i915_page_sizes *page_sizes,
				     u32 value)
{
	struct drm_i915_private *i915 = to_i915(obj->base.dev);
	struct i915_gem_context *ctx = ce->gem_context;
	struct i915_address_space *vm;
	struct clear_pages_work *work;
	struct i915_sleeve *sleeve;
	int err;

	vm = ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;

	sleeve = create_sleeve(vm, obj, pages, page_sizes);
	if (IS_ERR(sleeve))
		return PTR_ERR(sleeve);

	work = kmalloc(sizeof(*work), GFP_KERNEL);
	if (!work) {
		destroy_sleeve(sleeve);
		return -ENOMEM;
	}

	work->value = value;
	work->sleeve = sleeve;
	work->ce = ce;

	INIT_WORK(&work->work, clear_pages_worker);

	init_irq_work(&work->irq_work, clear_pages_signal_irq_worker);

	dma_fence_init(&work->dma,
		       &clear_pages_work_ops,
		       &fence_lock,
		       i915->mm.unordered_timeline,
		       0);
	i915_sw_fence_init(&work->wait, clear_pages_work_notify);

	i915_gem_object_lock(obj);
	err = i915_sw_fence_await_reservation(&work->wait,
					      obj->resv, NULL,
					      true, I915_FENCE_TIMEOUT,
					      I915_FENCE_GFP);
	if (err < 0) {
		dma_fence_set_error(&work->dma, err);
	} else {
		reservation_object_add_excl_fence(obj->resv, &work->dma);
		err = 0;
	}
	i915_gem_object_unlock(obj);

	dma_fence_get(&work->dma);
	i915_sw_fence_commit(&work->wait);

	return err;
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_gem_client_blt.c"
#endif
+21 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: MIT */
/*
 * Copyright © 2019 Intel Corporation
 */
#ifndef __I915_GEM_CLIENT_BLT_H__
#define __I915_GEM_CLIENT_BLT_H__

#include <linux/types.h>

struct drm_i915_gem_object;
struct i915_page_sizes;
struct intel_context;
struct sg_table;

int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
				     struct intel_context *ce,
				     struct sg_table *pages,
				     struct i915_page_sizes *page_sizes,
				     u32 value);

#endif
+109 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2019 Intel Corporation
 */

#include "i915_gem_object_blt.h"

#include "i915_gem_clflush.h"
#include "intel_drv.h"

int intel_emit_vma_fill_blt(struct i915_request *rq,
			    struct i915_vma *vma,
			    u32 value)
{
	u32 *cs;

	cs = intel_ring_begin(rq, 8);
	if (IS_ERR(cs))
		return PTR_ERR(cs);

	if (INTEL_GEN(rq->i915) >= 8) {
		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
		*cs++ = 0;
		*cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
		*cs++ = lower_32_bits(vma->node.start);
		*cs++ = upper_32_bits(vma->node.start);
		*cs++ = value;
		*cs++ = MI_NOOP;
	} else {
		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
		*cs++ = 0;
		*cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
		*cs++ = vma->node.start;
		*cs++ = value;
		*cs++ = MI_NOOP;
		*cs++ = MI_NOOP;
	}

	intel_ring_advance(rq, cs);

	return 0;
}

int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
			     struct intel_context *ce,
			     u32 value)
{
	struct drm_i915_private *i915 = to_i915(obj->base.dev);
	struct i915_gem_context *ctx = ce->gem_context;
	struct i915_address_space *vm;
	struct i915_request *rq;
	struct i915_vma *vma;
	int err;

	/* XXX: ce->vm please */
	vm = ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;

	vma = i915_vma_instance(obj, vm, NULL);
	if (IS_ERR(vma))
		return PTR_ERR(vma);

	err = i915_vma_pin(vma, 0, 0, PIN_USER);
	if (unlikely(err))
		return err;

	if (obj->cache_dirty & ~obj->cache_coherent) {
		i915_gem_object_lock(obj);
		i915_gem_clflush_object(obj, 0);
		i915_gem_object_unlock(obj);
	}

	rq = i915_request_create(ce);
	if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
		goto out_unpin;
	}

	err = i915_request_await_object(rq, obj, true);
	if (unlikely(err))
		goto out_request;

	if (ce->engine->emit_init_breadcrumb) {
		err = ce->engine->emit_init_breadcrumb(rq);
		if (unlikely(err))
			goto out_request;
	}

	i915_vma_lock(vma);
	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
	i915_vma_unlock(vma);
	if (unlikely(err))
		goto out_request;

	err = intel_emit_vma_fill_blt(rq, vma, value);
out_request:
	if (unlikely(err))
		i915_request_skip(rq, err);

	i915_request_add(rq);
out_unpin:
	i915_vma_unpin(vma);
	return err;
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_gem_object_blt.c"
#endif
+24 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: MIT */
/*
 * Copyright © 2019 Intel Corporation
 */

#ifndef __I915_GEM_OBJECT_BLT_H__
#define __I915_GEM_OBJECT_BLT_H__

#include <linux/types.h>

struct drm_i915_gem_object;
struct intel_context;
struct i915_request;
struct i915_vma;

int intel_emit_vma_fill_blt(struct i915_request *rq,
			    struct i915_vma *vma,
			    u32 value);

int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
			     struct intel_context *ce,
			     u32 value);

#endif
Loading