habanalabs: use queue pi/ci in order to determine queue occupancy (79b1894c) · Commits · 戴 / test

drivers/misc/habanalabs/device.c

+10 −7

Original line number	Diff line number	Diff line
		@@ -1144,15 +1144,18 @@ int hl_device_init(struct hl_device hdev, struct class hclass)
		* because there the addresses of the completion queues are being
		* passed as arguments to request_irq
		*/
		if (cq_cnt) {
		hdev->completion_queue = kcalloc(cq_cnt,
		sizeof(*hdev->completion_queue),
		GFP_KERNEL);

		if (!hdev->completion_queue) {
		dev_err(hdev->dev, "failed to allocate completion queues\n");
		dev_err(hdev->dev,
		"failed to allocate completion queues\n");
		rc = -ENOMEM;
		goto hw_queues_destroy;
		}
		}

		for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
		rc = hl_cq_init(hdev, &hdev->completion_queue[i],

drivers/misc/habanalabs/habanalabs.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -461,7 +461,7 @@ struct hl_hw_queue {
		u64 kernel_address;
		dma_addr_t bus_address;
		u32 pi;
		u32 ci;
		atomic_t ci;
		u32 hw_queue_id;
		u32 cq_id;
		u32 msi_vec;

drivers/misc/habanalabs/hw_queue.c

+27 −55

Original line number	Diff line number	Diff line
		@@ -23,10 +23,14 @@ inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
		ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
		return ptr;
		}
		static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
		{
		return atomic_read(ci) & ((queue_len << 1) - 1);
		}

		static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
		{
		int delta = (q->pi - q->ci);
		int delta = (q->pi - queue_ci_get(&q->ci, queue_len));

		if (delta >= 0)
		return (queue_len - delta);
		@@ -40,21 +44,14 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
		struct hl_hw_queue *q;
		int i;

		hdev->asic_funcs->hw_queues_lock(hdev);

		if (hdev->disabled)
		goto out;
		return;

		q = &hdev->kernel_queues[0];
		for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
		if (q->queue_type == QUEUE_TYPE_INT) {
		q->ci += cs->jobs_in_queue_cnt[i];
		q->ci &= ((q->int_queue_len << 1) - 1);
		}
		if (q->queue_type == QUEUE_TYPE_INT)
		atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
		}

		out:
		hdev->asic_funcs->hw_queues_unlock(hdev);
		}

		/*
		@@ -174,38 +171,26 @@ static int int_queue_sanity_checks(struct hl_device *hdev,
		}

		/*
		* hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue.
		* hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
		* @hdev: Pointer to hl_device structure.
		* @q: Pointer to hl_hw_queue structure.
		* @num_of_entries: How many entries to check for space.
		*
		* Perform the following:
		* - Make sure we have enough space in the completion queue.
		* This check also ensures that there is enough space in the h/w queue, as
		* both queues are of the same size.
		* - Reserve space in the completion queue (needs to be reversed if there
		* is a failure down the road before the actual submission of work).
		* Notice: We do not reserve queue entries so this function mustn't be called
		* more than once per CS for the same queue
		*
		* Both operations are done using the "free_slots_cnt" field of the completion
		* queue. The CI counters of the queue and the completion queue are not
		* needed/used for the H/W queue type.
		*/
		static int hw_queue_sanity_checks(struct hl_device hdev, struct hl_hw_queue q,
		int num_of_entries)
		{
		atomic_t *free_slots =
		&hdev->completion_queue[q->cq_id].free_slots_cnt;
		int free_slots_cnt;

		/*
		* Check we have enough space in the completion queue.
		* Add -1 to counter (decrement) unless counter was already 0.
		* In that case, CQ is full so we can't submit a new CB.
		* atomic_add_unless will return 0 if counter was already 0.
		*/
		if (atomic_add_negative(num_of_entries * -1, free_slots)) {
		dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n",
		num_of_entries, q->hw_queue_id);
		atomic_add(num_of_entries, free_slots);
		/* Check we have enough space in the queue */
		free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);

		if (free_slots_cnt < num_of_entries) {
		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
		q->hw_queue_id, num_of_entries);
		return -EAGAIN;
		}

		@@ -366,7 +351,6 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
		{
		struct hl_device *hdev = job->cs->ctx->hdev;
		struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
		struct hl_cq *cq;
		u64 ptr;
		u32 offset, ctl, len;

		@@ -395,17 +379,6 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
		else
		ptr = (u64) (uintptr_t) job->user_cb;

		/*
		* No need to protect pi_offset because scheduling to the
		* H/W queues is done under the scheduler mutex
		*
		* No need to check if CQ is full because it was already
		* checked in hw_queue_sanity_checks
		*/
		cq = &hdev->completion_queue[q->cq_id];

		cq->pi = hl_cq_inc_ptr(cq->pi);

		ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
		}

		@@ -552,8 +525,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
		goto unroll_cq_resv;
		}

		if (q->queue_type == QUEUE_TYPE_EXT \|\|
		q->queue_type == QUEUE_TYPE_HW)
		if (q->queue_type == QUEUE_TYPE_EXT)
		cq_cnt++;
		}
		}
		@@ -605,9 +577,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
		unroll_cq_resv:
		q = &hdev->kernel_queues[0];
		for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
		if ((q->queue_type == QUEUE_TYPE_EXT \|\|
		q->queue_type == QUEUE_TYPE_HW) &&
		cs->jobs_in_queue_cnt[i]) {
		if ((q->queue_type == QUEUE_TYPE_EXT) &&
		(cs->jobs_in_queue_cnt[i])) {
		atomic_t *free_slots =
		&hdev->completion_queue[i].free_slots_cnt;
		atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
		@@ -631,7 +602,7 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
		{
		struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];

		q->ci = hl_queue_inc_ptr(q->ci);
		atomic_inc(&q->ci);
		}

		static int ext_and_cpu_queue_init(struct hl_device hdev, struct hl_hw_queue q,
		@@ -666,7 +637,7 @@ static int ext_and_cpu_queue_init(struct hl_device hdev, struct hl_hw_queue q,
		}

		/* Make sure read/write pointers are initialized to start of queue */
		q->ci = 0;
		atomic_set(&q->ci, 0);
		q->pi = 0;

		return 0;
		@@ -700,7 +671,7 @@ static int int_queue_init(struct hl_device hdev, struct hl_hw_queue q)

		q->kernel_address = (u64) (uintptr_t) p;
		q->pi = 0;
		q->ci = 0;
		atomic_set(&q->ci, 0);

		return 0;
		}
		@@ -729,7 +700,7 @@ static int hw_queue_init(struct hl_device hdev, struct hl_hw_queue q)
		q->kernel_address = (u64) (uintptr_t) p;

		/* Make sure read/write pointers are initialized to start of queue */
		q->ci = 0;
		atomic_set(&q->ci, 0);
		q->pi = 0;

		return 0;
		@@ -931,7 +902,8 @@ void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
		if ((!q->valid) \|\|
		((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
		continue;
		q->pi = q->ci = 0;
		q->pi = 0;
		atomic_set(&q->ci, 0);

		if (q->supports_sync_stream)
		sync_stream_queue_reset(hdev, q->hw_queue_id);

drivers/misc/habanalabs/irq.c

+1 −6

Original line number	Diff line number	Diff line
		@@ -122,12 +122,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
		queue_work(hdev->cq_wq, &job->finish_work);
		}

		/* Update ci of the context's queue. There is no
		* need to protect it with spinlock because this update is
		* done only inside IRQ and there is a different IRQ per
		* queue
		*/
		queue->ci = hl_queue_inc_ptr(queue->ci);
		atomic_inc(&queue->ci);

		/* Clear CQ entry ready bit */
		cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &

Admin message