Commit 6951e589 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Move GEM object domain management from struct_mutex to local



Use the per-object local lock to control the cache domain of the
individual GEM objects, not struct_mutex. This is a huge leap forward
for us in terms of object-level synchronisation; execbuffers are
coordinated using the ww_mutex and pread/pwrite is finally fully
serialised again.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190528092956.14910-10-chris@chris-wilson.co.uk
parent 37d63f8f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -93,6 +93,7 @@ gem-y += \
	gem/i915_gem_dmabuf.o \
	gem/i915_gem_domain.o \
	gem/i915_gem_execbuffer.o \
	gem/i915_gem_fence.o \
	gem/i915_gem_internal.o \
	gem/i915_gem_object.o \
	gem/i915_gem_mman.o \
+2 −2
Original line number Diff line number Diff line
@@ -95,6 +95,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
{
	struct clflush *clflush;

	assert_object_held(obj);

	/*
	 * Stolen memory is always coherent with the GPU as it is explicitly
	 * marked as wc by the system, or the system is cache-coherent.
@@ -144,9 +146,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
						true, I915_FENCE_TIMEOUT,
						I915_FENCE_GFP);

		reservation_object_lock(obj->resv, NULL);
		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
		reservation_object_unlock(obj->resv);

		i915_sw_fence_commit(&clflush->wait);
	} else if (obj->mm.pages) {
+4 −6
Original line number Diff line number Diff line
@@ -151,7 +151,6 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
{
	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
	struct drm_device *dev = obj->base.dev;
	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
	int err;

@@ -159,12 +158,12 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
	if (err)
		return err;

	err = i915_mutex_lock_interruptible(dev);
	err = i915_gem_object_lock_interruptible(obj);
	if (err)
		goto out;

	err = i915_gem_object_set_to_cpu_domain(obj, write);
	mutex_unlock(&dev->struct_mutex);
	i915_gem_object_unlock(obj);

out:
	i915_gem_object_unpin_pages(obj);
@@ -174,19 +173,18 @@ out:
static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
{
	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
	struct drm_device *dev = obj->base.dev;
	int err;

	err = i915_gem_object_pin_pages(obj);
	if (err)
		return err;

	err = i915_mutex_lock_interruptible(dev);
	err = i915_gem_object_lock_interruptible(obj);
	if (err)
		goto out;

	err = i915_gem_object_set_to_gtt_domain(obj, false);
	mutex_unlock(&dev->struct_mutex);
	i915_gem_object_unlock(obj);

out:
	i915_gem_object_unpin_pages(obj);
+39 −31
Original line number Diff line number Diff line
@@ -29,9 +29,9 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
	if (!READ_ONCE(obj->pin_global))
		return;

	mutex_lock(&obj->base.dev->struct_mutex);
	i915_gem_object_lock(obj);
	__i915_gem_object_flush_for_display(obj);
	mutex_unlock(&obj->base.dev->struct_mutex);
	i915_gem_object_unlock(obj);
}

/**
@@ -47,11 +47,10 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
{
	int ret;

	lockdep_assert_held(&obj->base.dev->struct_mutex);
	assert_object_held(obj);

	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE |
				   I915_WAIT_LOCKED |
				   (write ? I915_WAIT_ALL : 0),
				   MAX_SCHEDULE_TIMEOUT);
	if (ret)
@@ -109,11 +108,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
{
	int ret;

	lockdep_assert_held(&obj->base.dev->struct_mutex);
	assert_object_held(obj);

	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE |
				   I915_WAIT_LOCKED |
				   (write ? I915_WAIT_ALL : 0),
				   MAX_SCHEDULE_TIMEOUT);
	if (ret)
@@ -179,7 +177,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
	struct i915_vma *vma;
	int ret;

	lockdep_assert_held(&obj->base.dev->struct_mutex);
	assert_object_held(obj);

	if (obj->cache_level == cache_level)
		return 0;
@@ -228,7 +226,6 @@ restart:
		 */
		ret = i915_gem_object_wait(obj,
					   I915_WAIT_INTERRUPTIBLE |
					   I915_WAIT_LOCKED |
					   I915_WAIT_ALL,
					   MAX_SCHEDULE_TIMEOUT);
		if (ret)
@@ -372,12 +369,16 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
	if (ret)
		goto out;

	ret = i915_mutex_lock_interruptible(dev);
	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
	if (ret)
		goto out;

	ret = i915_gem_object_lock_interruptible(obj);
	if (ret == 0) {
		ret = i915_gem_object_set_cache_level(obj, level);
	mutex_unlock(&dev->struct_mutex);
		i915_gem_object_unlock(obj);
	}
	mutex_unlock(&i915->drm.struct_mutex);

out:
	i915_gem_object_put(obj);
@@ -399,7 +400,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
	struct i915_vma *vma;
	int ret;

	lockdep_assert_held(&obj->base.dev->struct_mutex);
	assert_object_held(obj);

	/* Mark the global pin early so that we account for the
	 * display coherency whilst setting up the cache domains.
@@ -484,16 +485,18 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
void
i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
{
	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
	struct drm_i915_gem_object *obj = vma->obj;

	assert_object_held(obj);

	if (WARN_ON(vma->obj->pin_global == 0))
	if (WARN_ON(obj->pin_global == 0))
		return;

	if (--vma->obj->pin_global == 0)
	if (--obj->pin_global == 0)
		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;

	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
	i915_gem_object_bump_inactive_ggtt(vma->obj);
	i915_gem_object_bump_inactive_ggtt(obj);

	i915_vma_unpin(vma);
}
@@ -511,11 +514,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
{
	int ret;

	lockdep_assert_held(&obj->base.dev->struct_mutex);
	assert_object_held(obj);

	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE |
				   I915_WAIT_LOCKED |
				   (write ? I915_WAIT_ALL : 0),
				   MAX_SCHEDULE_TIMEOUT);
	if (ret)
@@ -637,7 +639,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
	if (err)
		goto out;

	err = i915_mutex_lock_interruptible(dev);
	err = i915_gem_object_lock_interruptible(obj);
	if (err)
		goto out_unpin;

@@ -651,7 +653,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
	/* And bump the LRU for this access */
	i915_gem_object_bump_inactive_ggtt(obj);

	mutex_unlock(&dev->struct_mutex);
	i915_gem_object_unlock(obj);

	if (write_domain != 0)
		intel_fb_obj_invalidate(obj,
@@ -674,22 +676,23 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
{
	int ret;

	lockdep_assert_held(&obj->base.dev->struct_mutex);

	*needs_clflush = 0;
	if (!i915_gem_object_has_struct_page(obj))
		return -ENODEV;

	ret = i915_gem_object_lock_interruptible(obj);
	if (ret)
		return ret;

	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE |
				   I915_WAIT_LOCKED,
				   I915_WAIT_INTERRUPTIBLE,
				   MAX_SCHEDULE_TIMEOUT);
	if (ret)
		return ret;
		goto err_unlock;

	ret = i915_gem_object_pin_pages(obj);
	if (ret)
		return ret;
		goto err_unlock;

	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -717,6 +720,8 @@ out:

err_unpin:
	i915_gem_object_unpin_pages(obj);
err_unlock:
	i915_gem_object_unlock(obj);
	return ret;
}

@@ -725,23 +730,24 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
{
	int ret;

	lockdep_assert_held(&obj->base.dev->struct_mutex);

	*needs_clflush = 0;
	if (!i915_gem_object_has_struct_page(obj))
		return -ENODEV;

	ret = i915_gem_object_lock_interruptible(obj);
	if (ret)
		return ret;

	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE |
				   I915_WAIT_LOCKED |
				   I915_WAIT_ALL,
				   MAX_SCHEDULE_TIMEOUT);
	if (ret)
		return ret;
		goto err_unlock;

	ret = i915_gem_object_pin_pages(obj);
	if (ret)
		return ret;
		goto err_unlock;

	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -778,5 +784,7 @@ out:

err_unpin:
	i915_gem_object_unpin_pages(obj);
err_unlock:
	i915_gem_object_unlock(obj);
	return ret;
}
+84 −39
Original line number Diff line number Diff line
@@ -1075,7 +1075,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
		if (use_cpu_reloc(cache, obj))
			return NULL;

		i915_gem_object_lock(obj);
		err = i915_gem_object_set_to_gtt_domain(obj, true);
		i915_gem_object_unlock(obj);
		if (err)
			return ERR_PTR(err);

@@ -1164,6 +1166,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
		*addr = value;
}

static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
{
	struct drm_i915_gem_object *obj = vma->obj;
	int err;

	i915_vma_lock(vma);

	if (obj->cache_dirty & ~obj->cache_coherent)
		i915_gem_clflush_object(obj, 0);
	obj->write_domain = 0;

	err = i915_request_await_object(rq, vma->obj, true);
	if (err == 0)
		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);

	i915_vma_unlock(vma);

	return err;
}

static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
			     struct i915_vma *vma,
			     unsigned int len)
@@ -1175,15 +1197,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
	u32 *cmd;
	int err;

	if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) {
		obj = vma->obj;
		if (obj->cache_dirty & ~obj->cache_coherent)
			i915_gem_clflush_object(obj, 0);
		obj->write_domain = 0;
	}

	GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU);

	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
	if (IS_ERR(obj))
		return PTR_ERR(obj);
@@ -1212,7 +1225,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
		goto err_unpin;
	}

	err = i915_request_await_object(rq, vma->obj, true);
	err = reloc_move_to_gpu(rq, vma);
	if (err)
		goto err_request;

@@ -1220,14 +1233,12 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
					batch->node.start, PAGE_SIZE,
					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
	if (err)
		goto err_request;
		goto skip_request;

	i915_vma_lock(batch);
	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
	err = i915_vma_move_to_active(batch, rq, 0);
	if (err)
		goto skip_request;

	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
	i915_vma_unlock(batch);
	if (err)
		goto skip_request;

@@ -1837,25 +1848,60 @@ slow:
static int eb_move_to_gpu(struct i915_execbuffer *eb)
{
	const unsigned int count = eb->buffer_count;
	struct ww_acquire_ctx acquire;
	unsigned int i;
	int err;
	int err = 0;

	ww_acquire_init(&acquire, &reservation_ww_class);

	for (i = 0; i < count; i++) {
		struct i915_vma *vma = eb->vma[i];

		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
		if (!err)
			continue;

		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */

		if (err == -EDEADLK) {
			GEM_BUG_ON(i == 0);
			do {
				int j = i - 1;

				ww_mutex_unlock(&eb->vma[j]->resv->lock);

				swap(eb->flags[i], eb->flags[j]);
				swap(eb->vma[i],  eb->vma[j]);
				eb->vma[i]->exec_flags = &eb->flags[i];
			} while (--i);
			GEM_BUG_ON(vma != eb->vma[0]);
			vma->exec_flags = &eb->flags[0];

			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
							       &acquire);
		}
		if (err)
			break;
	}
	ww_acquire_done(&acquire);

	while (i--) {
		unsigned int flags = eb->flags[i];
		struct i915_vma *vma = eb->vma[i];
		struct drm_i915_gem_object *obj = vma->obj;

		assert_vma_held(vma);

		if (flags & EXEC_OBJECT_CAPTURE) {
			struct i915_capture_list *capture;

			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
			if (unlikely(!capture))
				return -ENOMEM;

			if (capture) {
				capture->next = eb->request->capture_list;
			capture->vma = eb->vma[i];
				capture->vma = vma;
				eb->request->capture_list = capture;
			}
		}

		/*
		 * If the GPU is not _reading_ through the CPU cache, we need
@@ -1874,24 +1920,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
				flags &= ~EXEC_OBJECT_ASYNC;
		}

		if (flags & EXEC_OBJECT_ASYNC)
			continue;

		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
			err = i915_request_await_object
				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
		if (err)
			return err;
		}

	for (i = 0; i < count; i++) {
		unsigned int flags = eb->flags[i];
		struct i915_vma *vma = eb->vma[i];

		if (err == 0)
			err = i915_vma_move_to_active(vma, eb->request, flags);
		if (unlikely(err)) {
			i915_request_skip(eb->request, err);
			return err;
		}

		i915_vma_unlock(vma);

		__eb_unreserve_vma(vma, flags);
		vma->exec_flags = NULL;
@@ -1899,12 +1936,20 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
			i915_vma_put(vma);
	}
	ww_acquire_fini(&acquire);

	if (unlikely(err))
		goto err_skip;

	eb->exec = NULL;

	/* Unconditionally flush any chipset caches (for streaming writes). */
	i915_gem_chipset_flush(eb->i915);

	return 0;

err_skip:
	i915_request_skip(eb->request, err);
	return err;
}

static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
Loading