Commit 0ca88ba0 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Always allocate an object/vma for the HWSP



Currently we only allocate an object and vma if we are using a GGTT
virtual HWSP, and a plain struct page for a physical HWSP. For
convenience later on with global timelines, it will be useful to always
have the status page being tracked by a struct i915_vma. Make it so.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-4-chris@chris-wilson.co.uk
parent 528cbd17
Loading
Loading
Loading
Loading
+55 −54
Original line number Diff line number Diff line
@@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)

static void cleanup_status_page(struct intel_engine_cs *engine)
{
	struct i915_vma *vma;

	/* Prevent writes into HWSP after returning the page to the system */
	intel_engine_set_hwsp_writemask(engine, ~0u);

	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
		void *addr = fetch_and_zero(&engine->status_page.page_addr);
	vma = fetch_and_zero(&engine->status_page.vma);
	if (!vma)
		return;

	if (!HWS_NEEDS_PHYSICAL(engine->i915))
		i915_vma_unpin(vma);

		__free_page(virt_to_page(addr));
	i915_gem_object_unpin_map(vma->obj);
	__i915_gem_object_release_unless_active(vma->obj);
}

	i915_vma_unpin_and_release(&engine->status_page.vma,
				   I915_VMA_RELEASE_MAP);
static int pin_ggtt_status_page(struct intel_engine_cs *engine,
				struct i915_vma *vma)
{
	unsigned int flags;

	flags = PIN_GLOBAL;
	if (!HAS_LLC(engine->i915))
		/*
		 * On g33, we cannot place HWS above 256MiB, so
		 * restrict its pinning to the low mappable arena.
		 * Though this restriction is not documented for
		 * gen4, gen5, or byt, they also behave similarly
		 * and hang if the HWS is placed at the top of the
		 * GTT. To generalise, it appears that all !llc
		 * platforms have issues with us placing the HWS
		 * above the mappable region (even though we never
		 * actually map it).
		 */
		flags |= PIN_MAPPABLE;
	else
		flags |= PIN_HIGH;

	return i915_vma_pin(vma, 0, 0, flags);
}

static int init_status_page(struct intel_engine_cs *engine)
{
	struct drm_i915_gem_object *obj;
	struct i915_vma *vma;
	unsigned int flags;
	void *vaddr;
	int ret;

	/*
	 * Though the HWS register does support 36bit addresses, historically
	 * we have had hangs and corruption reported due to wild writes if
	 * the HWS is placed above 4G. We only allow objects to be allocated
	 * in GFP_DMA32 for i965, and no earlier physical address users had
	 * access to more than 4G.
	 */
	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
	if (IS_ERR(obj)) {
		DRM_ERROR("Failed to allocate status page\n");
@@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
		goto err;
	}

	flags = PIN_GLOBAL;
	if (!HAS_LLC(engine->i915))
		/* On g33, we cannot place HWS above 256MiB, so
		 * restrict its pinning to the low mappable arena.
		 * Though this restriction is not documented for
		 * gen4, gen5, or byt, they also behave similarly
		 * and hang if the HWS is placed at the top of the
		 * GTT. To generalise, it appears that all !llc
		 * platforms have issues with us placing the HWS
		 * above the mappable region (even though we never
		 * actually map it).
		 */
		flags |= PIN_MAPPABLE;
	else
		flags |= PIN_HIGH;
	ret = i915_vma_pin(vma, 0, 0, flags);
	if (ret)
		goto err;

	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
	if (IS_ERR(vaddr)) {
		ret = PTR_ERR(vaddr);
		goto err_unpin;
		goto err;
	}

	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
	engine->status_page.vma = vma;
	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);

	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
		ret = pin_ggtt_status_page(engine, vma);
		if (ret)
			goto err_unpin;
	}

	return 0;

err_unpin:
	i915_vma_unpin(vma);
	i915_gem_object_unpin_map(obj);
err:
	i915_gem_object_put(obj);
	return ret;
}

static int init_phys_status_page(struct intel_engine_cs *engine)
{
	struct page *page;

	/*
	 * Though the HWS register does support 36bit addresses, historically
	 * we have had hangs and corruption reported due to wild writes if
	 * the HWS is placed above 4G.
	 */
	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
	if (!page)
		return -ENOMEM;

	engine->status_page.page_addr = page_address(page);

	return 0;
}

static void __intel_context_unpin(struct i915_gem_context *ctx,
				  struct intel_engine_cs *engine)
{
@@ -690,9 +693,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
	if (ret)
		goto err_unpin_preempt;

	if (HWS_NEEDS_PHYSICAL(i915))
		ret = init_phys_status_page(engine);
	else
	ret = init_status_page(engine);
	if (ret)
		goto err_breadcrumbs;
@@ -1366,7 +1366,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
	}

	if (HAS_EXECLISTS(dev_priv)) {
		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
		const u32 *hws =
			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
		unsigned int idx;
		u8 read, write;

@@ -1549,7 +1550,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
	spin_unlock_irqrestore(&b->rb_lock, flags);

	drm_printf(m, "HWSP:\n");
	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
	hexdump(m, engine->status_page.addr, PAGE_SIZE);

	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
}
+6 −0
Original line number Diff line number Diff line
@@ -81,6 +81,12 @@
 *
 */

static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
{
	return (i915_ggtt_offset(engine->status_page.vma) +
		I915_GEM_HWS_PREEMPT_ADDR);
}

static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
	return rb_entry(rb, struct i915_priolist, node);
+9 −3
Original line number Diff line number Diff line
@@ -172,6 +172,12 @@ static void execlists_init_reg_state(u32 *reg_state,
				     struct intel_engine_cs *engine,
				     struct intel_ring *ring);

static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
{
	return (i915_ggtt_offset(engine->status_page.vma) +
		I915_GEM_HWS_INDEX_ADDR);
}

static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
	return rb_entry(rb, struct i915_priolist, node);
@@ -1699,7 +1705,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
		   _MASKED_BIT_DISABLE(STOP_RING));

	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
		   engine->status_page.ggtt_offset);
		   i915_ggtt_offset(engine->status_page.vma));
	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
}

@@ -2244,10 +2250,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
	}

	execlists->csb_status =
		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];

	execlists->csb_write =
		&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
		&engine->status_page.addr[intel_hws_csb_write_index(i915)];

	reset_csb_pointers(execlists);

+16 −5
Original line number Diff line number Diff line
@@ -43,6 +43,12 @@
 */
#define LEGACY_REQUEST_SIZE 200

static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
{
	return (i915_ggtt_offset(engine->status_page.vma) +
		I915_GEM_HWS_INDEX_ADDR);
}

static unsigned int __intel_ring_space(unsigned int head,
				       unsigned int tail,
				       unsigned int size)
@@ -503,12 +509,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
	I915_WRITE(HWS_PGA, addr);
}

static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
static struct page *status_page(struct intel_engine_cs *engine)
{
	struct page *page = virt_to_page(engine->status_page.page_addr);
	phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;

	set_hws_pga(engine, phys);
	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
	return sg_page(obj->mm.pages->sgl);
}

static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
{
	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
	set_hwstam(engine, ~0u);
}

@@ -575,7 +586,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)

static void ring_setup_status_page(struct intel_engine_cs *engine)
{
	set_hwsp(engine, engine->status_page.ggtt_offset);
	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
	set_hwstam(engine, ~0u);

	flush_cs_tlb(engine);
+6 −17
Original line number Diff line number Diff line
@@ -32,8 +32,7 @@ struct i915_sched_attr;

struct intel_hw_status_page {
	struct i915_vma *vma;
	u32 *page_addr;
	u32 ggtt_offset;
	u32 *addr;
};

#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -671,7 +670,7 @@ static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
	/* Ensure that the compiler doesn't optimize away the load. */
	return READ_ONCE(engine->status_page.page_addr[reg]);
	return READ_ONCE(engine->status_page.addr[reg]);
}

static inline void
@@ -684,12 +683,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
	 */
	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
		mb();
		clflush(&engine->status_page.page_addr[reg]);
		engine->status_page.page_addr[reg] = value;
		clflush(&engine->status_page.page_addr[reg]);
		clflush(&engine->status_page.addr[reg]);
		engine->status_page.addr[reg] = value;
		clflush(&engine->status_page.addr[reg]);
		mb();
	} else {
		WRITE_ONCE(engine->status_page.page_addr[reg], value);
		WRITE_ONCE(engine->status_page.addr[reg], value);
	}
}

@@ -877,16 +876,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
void intel_engine_get_instdone(struct intel_engine_cs *engine,
			       struct intel_instdone *instdone);

static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
{
	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
}

static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
{
	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
}

/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);

Loading