Commit cb596aee authored by Tomer Tayar's avatar Tomer Tayar Committed by Oded Gabbay
Browse files

habanalabs: Add a new H/W queue type



This patch adds a support for a new H/W queue type.
This type of queue is for DMA and compute engines jobs, for which
completion notification are sent by H/W.
Command buffer for this queue can be created either through the CB
IOCTL and using the retrieved CB handle, or by preparing a buffer on the
host or device SRAM/DRAM, and using the device address to that buffer.
The patch includes the handling of the 2 options, as well as the
initialization of the H/W queue and its jobs scheduling.

Signed-off-by: default avatarTomer Tayar <ttayar@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent df762375
Loading
Loading
Loading
Loading
+82 −38
Original line number Diff line number Diff line
@@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs)
	kref_put(&cs->refcount, cs_do_release);
}

static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
{
	/*
	 * Patched CB is created for external queues jobs, and for H/W queues
	 * jobs if the user CB was allocated by driver and MMU is disabled.
	 */
	return (job->queue_type == QUEUE_TYPE_EXT ||
			(job->queue_type == QUEUE_TYPE_HW &&
					job->is_kernel_allocated_cb &&
					!hdev->mmu_enable));
}

/*
 * cs_parser - parse the user command submission
 *
@@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
	parser.patched_cb = NULL;
	parser.user_cb = job->user_cb;
	parser.user_cb_size = job->user_cb_size;
	parser.ext_queue = job->ext_queue;
	parser.queue_type = job->queue_type;
	parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
	job->patched_cb = NULL;

	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
	if (job->ext_queue) {

	if (is_cb_patched(hdev, job)) {
		if (!rc) {
			job->patched_cb = parser.patched_cb;
			job->job_cb_size = parser.patched_cb_size;
@@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
{
	struct hl_cs *cs = job->cs;

	if (job->ext_queue) {
	if (is_cb_patched(hdev, job)) {
		hl_userptr_delete_list(hdev, &job->userptr_list);

		/*
@@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
		}
	}

	/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
	 * enabled, the user CB isn't released in cs_parser() and thus should be
	 * released here.
	 */
	if (job->queue_type == QUEUE_TYPE_HW &&
			job->is_kernel_allocated_cb && hdev->mmu_enable) {
		spin_lock(&job->user_cb->lock);
		job->user_cb->cs_cnt--;
		spin_unlock(&job->user_cb->lock);

		hl_cb_put(job->user_cb);
	}

	/*
	 * This is the only place where there can be multiple threads
	 * modifying the list at the same time
@@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)

	hl_debugfs_remove_job(hdev, job);

	if (job->ext_queue)
	if (job->queue_type == QUEUE_TYPE_EXT ||
			job->queue_type == QUEUE_TYPE_HW)
		cs_put(cs);

	kfree(job);
@@ -387,18 +415,13 @@ static void job_wq_completion(struct work_struct *work)
	free_job(hdev, job);
}

static struct hl_cb *validate_queue_index(struct hl_device *hdev,
					struct hl_cb_mgr *cb_mgr,
static int validate_queue_index(struct hl_device *hdev,
				struct hl_cs_chunk *chunk,
					bool *ext_queue)
				enum hl_queue_type *queue_type,
				bool *is_kernel_allocated_cb)
{
	struct asic_fixed_properties *asic = &hdev->asic_prop;
	struct hw_queue_properties *hw_queue_prop;
	u32 cb_handle;
	struct hl_cb *cb;

	/* Assume external queue */
	*ext_queue = true;

	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];

@@ -406,22 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
			(hw_queue_prop->type == QUEUE_TYPE_NA)) {
		dev_err(hdev->dev, "Queue index %d is invalid\n",
			chunk->queue_index);
		return NULL;
		return -EINVAL;
	}

	if (hw_queue_prop->driver_only) {
		dev_err(hdev->dev,
			"Queue index %d is restricted for the kernel driver\n",
			chunk->queue_index);
		return NULL;
		return -EINVAL;
	}

	if (!hw_queue_prop->requires_kernel_cb) {
		*ext_queue = false;
		return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
	*queue_type = hw_queue_prop->type;
	*is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;

	return 0;
}

	/* Retrieve CB object */
static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
					struct hl_cb_mgr *cb_mgr,
					struct hl_cs_chunk *chunk)
{
	struct hl_cb *cb;
	u32 cb_handle;

	cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);

	cb = hl_cb_get(hdev, cb_mgr, cb_handle);
@@ -446,7 +476,8 @@ release_cb:
	return NULL;
}

struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
{
	struct hl_cs_job *job;

@@ -454,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
	if (!job)
		return NULL;

	job->ext_queue = ext_queue;
	job->queue_type = queue_type;
	job->is_kernel_allocated_cb = is_kernel_allocated_cb;

	if (job->ext_queue) {
	if (is_cb_patched(hdev, job))
		INIT_LIST_HEAD(&job->userptr_list);

	if (job->queue_type == QUEUE_TYPE_EXT)
		INIT_WORK(&job->finish_work, job_wq_completion);
	}

	return job;
}
@@ -472,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
	struct hl_cs_job *job;
	struct hl_cs *cs;
	struct hl_cb *cb;
	bool ext_queue_present = false;
	bool int_queues_only = true;
	u32 size_to_copy;
	int rc, i, parse_cnt;

@@ -516,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
	/* Validate ALL the CS chunks before submitting the CS */
	for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
		bool ext_queue;
		enum hl_queue_type queue_type;
		bool is_kernel_allocated_cb;

		cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk,
					&ext_queue);
		if (ext_queue) {
			ext_queue_present = true;
		rc = validate_queue_index(hdev, chunk, &queue_type,
						&is_kernel_allocated_cb);
		if (rc)
			goto free_cs_object;

		if (is_kernel_allocated_cb) {
			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
			if (!cb) {
				rc = -EINVAL;
				goto free_cs_object;
			}
		} else {
			cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
		}

		job = hl_cs_allocate_job(hdev, ext_queue);
		if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
			int_queues_only = false;

		job = hl_cs_allocate_job(hdev, queue_type,
						is_kernel_allocated_cb);
		if (!job) {
			dev_err(hdev->dev, "Failed to allocate a new job\n");
			rc = -ENOMEM;
			if (ext_queue)
			if (is_kernel_allocated_cb)
				goto release_cb;
			else
				goto free_cs_object;
@@ -542,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
		job->cs = cs;
		job->user_cb = cb;
		job->user_cb_size = chunk->cb_size;
		if (job->ext_queue)
		if (is_kernel_allocated_cb)
			job->job_cb_size = cb->size;
		else
			job->job_cb_size = chunk->cb_size;
@@ -555,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
		/*
		 * Increment CS reference. When CS reference is 0, CS is
		 * done and can be signaled to user and free all its resources
		 * Only increment for JOB on external queues, because only
		 * for those JOBs we get completion
		 * Only increment for JOB on external or H/W queues, because
		 * only for those JOBs we get completion
		 */
		if (job->ext_queue)
		if (job->queue_type == QUEUE_TYPE_EXT ||
				job->queue_type == QUEUE_TYPE_HW)
			cs_get(cs);

		hl_debugfs_add_job(hdev, job);
@@ -572,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
		}
	}

	if (!ext_queue_present) {
	if (int_queues_only) {
		dev_err(hdev->dev,
			"Reject CS %d.%llu because no external queues jobs\n",
			"Reject CS %d.%llu because only internal queues jobs are present\n",
			cs->ctx->asid, cs->sequence);
		rc = -EINVAL;
		goto free_cs_object;
+2 −2
Original line number Diff line number Diff line
@@ -3943,7 +3943,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
{
	struct goya_device *goya = hdev->asic_specific;

	if (!parser->ext_queue)
	if (parser->queue_type == QUEUE_TYPE_INT)
		return goya_parse_cb_no_ext_queue(hdev, parser);

	if (goya->hw_cap_initialized & HW_CAP_MMU)
@@ -4614,7 +4614,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
		lin_dma_pkt++;
	} while (--lin_dma_pkts_cnt);

	job = hl_cs_allocate_job(hdev, true);
	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
	if (!job) {
		dev_err(hdev->dev, "Failed to allocate a new job\n");
		rc = -ENOMEM;
+18 −6
Original line number Diff line number Diff line
@@ -85,12 +85,15 @@ struct hl_fpriv;
 * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
 *			memories and/or operates the compute engines.
 * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
 * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
 *                 notifications are sent by H/W.
 */
enum hl_queue_type {
	QUEUE_TYPE_NA,
	QUEUE_TYPE_EXT,
	QUEUE_TYPE_INT,
	QUEUE_TYPE_CPU
	QUEUE_TYPE_CPU,
	QUEUE_TYPE_HW
};

/**
@@ -755,11 +758,14 @@ struct hl_cs {
 * @userptr_list: linked-list of userptr mappings that belong to this job and
 *			wait for completion.
 * @debugfs_list: node in debugfs list of command submission jobs.
 * @queue_type: the type of the H/W queue this job is submitted to.
 * @id: the id of this job inside a CS.
 * @hw_queue_id: the id of the H/W queue this job is submitted to.
 * @user_cb_size: the actual size of the CB we got from the user.
 * @job_cb_size: the actual size of the CB that we put on the queue.
 * @ext_queue: whether the job is for external queue or internal queue.
 * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
 *                          handle to a kernel-allocated CB object, false
 *                          otherwise (SRAM/DRAM/host address).
 */
struct hl_cs_job {
	struct list_head	cs_node;
@@ -769,11 +775,12 @@ struct hl_cs_job {
	struct work_struct	finish_work;
	struct list_head	userptr_list;
	struct list_head	debugfs_list;
	enum hl_queue_type	queue_type;
	u32			id;
	u32			hw_queue_id;
	u32			user_cb_size;
	u32			job_cb_size;
	u8			ext_queue;
	u8			is_kernel_allocated_cb;
};

/**
@@ -784,24 +791,28 @@ struct hl_cs_job {
 * @job_userptr_list: linked-list of userptr mappings that belong to the related
 *			job and wait for completion.
 * @cs_sequence: the sequence number of the related CS.
 * @queue_type: the type of the H/W queue this job is submitted to.
 * @ctx_id: the ID of the context the related CS belongs to.
 * @hw_queue_id: the id of the H/W queue this job is submitted to.
 * @user_cb_size: the actual size of the CB we got from the user.
 * @patched_cb_size: the size of the CB after parsing.
 * @ext_queue: whether the job is for external queue or internal queue.
 * @job_id: the id of the related job inside the related CS.
 * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
 *                          handle to a kernel-allocated CB object, false
 *                          otherwise (SRAM/DRAM/host address).
 */
struct hl_cs_parser {
	struct hl_cb		*user_cb;
	struct hl_cb		*patched_cb;
	struct list_head	*job_userptr_list;
	u64			cs_sequence;
	enum hl_queue_type	queue_type;
	u32			ctx_id;
	u32			hw_queue_id;
	u32			user_cb_size;
	u32			patched_cb_size;
	u8			ext_queue;
	u8			job_id;
	u8			is_kernel_allocated_cb;
};


@@ -1504,7 +1515,8 @@ int hl_cb_pool_init(struct hl_device *hdev);
int hl_cb_pool_fini(struct hl_device *hdev);

void hl_cs_rollback_all(struct hl_device *hdev);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
		enum hl_queue_type queue_type, bool is_kernel_allocated_cb);

void goya_set_asic_funcs(struct hl_device *hdev);

+194 −55
Original line number Diff line number Diff line
@@ -58,8 +58,8 @@ out:
}

/*
 * ext_queue_submit_bd - Submit a buffer descriptor to an external queue
 *
 * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
 *                                H/W queue.
 * @hdev: pointer to habanalabs device structure
 * @q: pointer to habanalabs queue structure
 * @ctl: BD's control word
@@ -73,8 +73,8 @@ out:
 * This function must be called when the scheduler mutex is taken
 *
 */
static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
				u32 ctl, u32 len, u64 ptr)
static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
			struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
{
	struct hl_bd *bd;

@@ -173,6 +173,45 @@ static int int_queue_sanity_checks(struct hl_device *hdev,
	return 0;
}

/*
 * hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue.
 * @hdev: Pointer to hl_device structure.
 * @q: Pointer to hl_hw_queue structure.
 * @num_of_entries: How many entries to check for space.
 *
 * Perform the following:
 * - Make sure we have enough space in the completion queue.
 *   This check also ensures that there is enough space in the h/w queue, as
 *   both queues are of the same size.
 * - Reserve space in the completion queue (needs to be reversed if there
 *   is a failure down the road before the actual submission of work).
 *
 * Both operations are done using the "free_slots_cnt" field of the completion
 * queue. The CI counters of the queue and the completion queue are not
 * needed/used for the H/W queue type.
 */
static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
					int num_of_entries)
{
	atomic_t *free_slots =
			&hdev->completion_queue[q->hw_queue_id].free_slots_cnt;

	/*
	 * Check we have enough space in the completion queue.
	 * Add -1 to counter (decrement) unless counter was already 0.
	 * In that case, CQ is full so we can't submit a new CB.
	 * atomic_add_unless will return 0 if counter was already 0.
	 */
	if (atomic_add_negative(num_of_entries * -1, free_slots)) {
		dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n",
			num_of_entries, q->hw_queue_id);
		atomic_add(num_of_entries, free_slots);
		return -EAGAIN;
	}

	return 0;
}

/*
 * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
 *
@@ -188,7 +227,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
				u32 cb_size, u64 cb_ptr)
{
	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
	int rc;
	int rc = 0;

	/*
	 * The CPU queue is a synchronous queue with an effective depth of
@@ -206,11 +245,18 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
		goto out;
	}

	/*
	 * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
	 * type only on init phase, when the queues are empty and being tested,
	 * so there is no need for sanity checks.
	 */
	if (q->queue_type != QUEUE_TYPE_HW) {
		rc = ext_queue_sanity_checks(hdev, q, 1, false);
		if (rc)
			goto out;
	}

	ext_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
	ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);

out:
	if (q->queue_type != QUEUE_TYPE_CPU)
@@ -220,14 +266,14 @@ out:
}

/*
 * ext_hw_queue_schedule_job - submit a JOB to an external queue
 * ext_queue_schedule_job - submit a JOB to an external queue
 *
 * @job: pointer to the job that needs to be submitted to the queue
 *
 * This function must be called when the scheduler mutex is taken
 *
 */
static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
static void ext_queue_schedule_job(struct hl_cs_job *job)
{
	struct hl_device *hdev = job->cs->ctx->hdev;
	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
@@ -260,7 +306,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
	 * H/W queues is done under the scheduler mutex
	 *
	 * No need to check if CQ is full because it was already
	 * checked in hl_queue_sanity_checks
	 * checked in ext_queue_sanity_checks
	 */
	cq = &hdev->completion_queue[q->hw_queue_id];
	cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
@@ -274,18 +320,18 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)

	cq->pi = hl_cq_inc_ptr(cq->pi);

	ext_queue_submit_bd(hdev, q, ctl, len, ptr);
	ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
}

/*
 * int_hw_queue_schedule_job - submit a JOB to an internal queue
 * int_queue_schedule_job - submit a JOB to an internal queue
 *
 * @job: pointer to the job that needs to be submitted to the queue
 *
 * This function must be called when the scheduler mutex is taken
 *
 */
static void int_hw_queue_schedule_job(struct hl_cs_job *job)
static void int_queue_schedule_job(struct hl_cs_job *job)
{
	struct hl_device *hdev = job->cs->ctx->hdev;
	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
@@ -307,6 +353,60 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job)
	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
}

/*
 * hw_queue_schedule_job - submit a JOB to a H/W queue
 *
 * @job: pointer to the job that needs to be submitted to the queue
 *
 * This function must be called when the scheduler mutex is taken
 *
 */
static void hw_queue_schedule_job(struct hl_cs_job *job)
{
	struct hl_device *hdev = job->cs->ctx->hdev;
	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
	struct hl_cq *cq;
	u64 ptr;
	u32 offset, ctl, len;

	/*
	 * Upon PQE completion, COMP_DATA is used as the write data to the
	 * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
	 * write address offset in the SM block (QMAN LBW message).
	 * The write address offset is calculated as "COMP_OFFSET << 2".
	 */
	offset = job->cs->sequence & (HL_MAX_PENDING_CS - 1);
	ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
		((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);

	len = job->job_cb_size;

	/*
	 * A patched CB is created only if a user CB was allocated by driver and
	 * MMU is disabled. If MMU is enabled, the user CB should be used
	 * instead. If the user CB wasn't allocated by driver, assume that it
	 * holds an address.
	 */
	if (job->patched_cb)
		ptr = job->patched_cb->bus_address;
	else if (job->is_kernel_allocated_cb)
		ptr = job->user_cb->bus_address;
	else
		ptr = (u64) (uintptr_t) job->user_cb;

	/*
	 * No need to protect pi_offset because scheduling to the
	 * H/W queues is done under the scheduler mutex
	 *
	 * No need to check if CQ is full because it was already
	 * checked in hw_queue_sanity_checks
	 */
	cq = &hdev->completion_queue[q->hw_queue_id];
	cq->pi = hl_cq_inc_ptr(cq->pi);

	ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
}

/*
 * hl_hw_queue_schedule_cs - schedule a command submission
 *
@@ -330,23 +430,34 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
	}

	q = &hdev->kernel_queues[0];
	/* This loop assumes all external queues are consecutive */
	for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
		if (q->queue_type == QUEUE_TYPE_EXT) {
		if (cs->jobs_in_queue_cnt[i]) {
			switch (q->queue_type) {
			case QUEUE_TYPE_EXT:
				rc = ext_queue_sanity_checks(hdev, q,
						cs->jobs_in_queue_cnt[i], true);
				if (rc)
					goto unroll_cq_resv;
				cq_cnt++;
			}
		} else if (q->queue_type == QUEUE_TYPE_INT) {
			if (cs->jobs_in_queue_cnt[i]) {
				break;
			case QUEUE_TYPE_INT:
				rc = int_queue_sanity_checks(hdev, q,
						cs->jobs_in_queue_cnt[i]);
				break;
			case QUEUE_TYPE_HW:
				rc = hw_queue_sanity_checks(hdev, q,
						cs->jobs_in_queue_cnt[i]);
				break;
			default:
				dev_err(hdev->dev, "Queue type %d is invalid\n",
					q->queue_type);
				rc = -EINVAL;
				break;
			}

			if (rc)
				goto unroll_cq_resv;
			}

			if (q->queue_type == QUEUE_TYPE_EXT ||
					q->queue_type == QUEUE_TYPE_HW)
				cq_cnt++;
		}
	}

@@ -373,21 +484,30 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
	}

	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
		if (job->ext_queue)
			ext_hw_queue_schedule_job(job);
		else
			int_hw_queue_schedule_job(job);
		switch (job->queue_type) {
		case QUEUE_TYPE_EXT:
			ext_queue_schedule_job(job);
			break;
		case QUEUE_TYPE_INT:
			int_queue_schedule_job(job);
			break;
		case QUEUE_TYPE_HW:
			hw_queue_schedule_job(job);
			break;
		default:
			break;
		}

	cs->submitted = true;

	goto out;

unroll_cq_resv:
	/* This loop assumes all external queues are consecutive */
	q = &hdev->kernel_queues[0];
	for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) {
		if ((q->queue_type == QUEUE_TYPE_EXT) &&
				(cs->jobs_in_queue_cnt[i])) {
		if ((q->queue_type == QUEUE_TYPE_EXT ||
				q->queue_type == QUEUE_TYPE_HW) &&
				cs->jobs_in_queue_cnt[i]) {
			atomic_t *free_slots =
				&hdev->completion_queue[i].free_slots_cnt;
			atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
@@ -414,8 +534,8 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
	q->ci = hl_queue_inc_ptr(q->ci);
}

static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
				struct hl_hw_queue *q, bool is_cpu_queue)
static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
					bool is_cpu_queue)
{
	void *p;
	int rc;
@@ -465,7 +585,7 @@ free_queue:
	return rc;
}

static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{
	void *p;

@@ -485,18 +605,38 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
	return 0;
}

static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{
	return ext_and_cpu_queue_init(hdev, q, true);
}

static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{
	return ext_and_cpu_hw_queue_init(hdev, q, true);
	return ext_and_cpu_queue_init(hdev, q, false);
}

static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{
	return ext_and_cpu_hw_queue_init(hdev, q, false);
	void *p;

	p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
						HL_QUEUE_SIZE_IN_BYTES,
						&q->bus_address,
						GFP_KERNEL | __GFP_ZERO);
	if (!p)
		return -ENOMEM;

	q->kernel_address = (u64) (uintptr_t) p;

	/* Make sure read/write pointers are initialized to start of queue */
	q->ci = 0;
	q->pi = 0;

	return 0;
}

/*
 * hw_queue_init - main initialization function for H/W queue object
 * queue_init - main initialization function for H/W queue object
 *
 * @hdev: pointer to hl_device device structure
 * @q: pointer to hl_hw_queue queue structure
@@ -505,7 +645,7 @@ static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
 * Allocate dma-able memory for the queue and initialize fields
 * Returns 0 on success
 */
static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
			u32 hw_queue_id)
{
	int rc;
@@ -516,21 +656,20 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,

	switch (q->queue_type) {
	case QUEUE_TYPE_EXT:
		rc = ext_hw_queue_init(hdev, q);
		rc = ext_queue_init(hdev, q);
		break;

	case QUEUE_TYPE_INT:
		rc = int_hw_queue_init(hdev, q);
		rc = int_queue_init(hdev, q);
		break;

	case QUEUE_TYPE_CPU:
		rc = cpu_hw_queue_init(hdev, q);
		rc = cpu_queue_init(hdev, q);
		break;
	case QUEUE_TYPE_HW:
		rc = hw_queue_init(hdev, q);
		break;

	case QUEUE_TYPE_NA:
		q->valid = 0;
		return 0;

	default:
		dev_crit(hdev->dev, "wrong queue type %d during init\n",
			q->queue_type);
@@ -554,7 +693,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 *
 * Free the queue memory
 */
static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
{
	if (!q->valid)
		return;
@@ -612,7 +751,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
			i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {

		q->queue_type = asic->hw_queues_props[i].type;
		rc = hw_queue_init(hdev, q, i);
		rc = queue_init(hdev, q, i);
		if (rc) {
			dev_err(hdev->dev,
				"failed to initialize queue %d\n", i);
@@ -624,7 +763,7 @@ int hl_hw_queues_create(struct hl_device *hdev)

release_queues:
	for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
		hw_queue_fini(hdev, q);
		queue_fini(hdev, q);

	kfree(hdev->kernel_queues);

@@ -637,7 +776,7 @@ void hl_hw_queues_destroy(struct hl_device *hdev)
	int i;

	for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++)
		hw_queue_fini(hdev, q);
		queue_fini(hdev, q);

	kfree(hdev->kernel_queues);
}
+12 −0
Original line number Diff line number Diff line
@@ -23,6 +23,8 @@ struct hl_bd {
#define HL_BD_SIZE			sizeof(struct hl_bd)

/*
 * S/W CTL FIELDS.
 *
 * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
 * valid. 1 means the repeat field is valid, 0 means not-valid,
 * i.e. repeat == 1
@@ -33,6 +35,16 @@ struct hl_bd {
#define BD_CTL_SHADOW_INDEX_SHIFT	0
#define BD_CTL_SHADOW_INDEX_MASK	0x00000FFF

/*
 * H/W CTL FIELDS
 */

#define BD_CTL_COMP_OFFSET_SHIFT	16
#define BD_CTL_COMP_OFFSET_MASK		0x00FF0000

#define BD_CTL_COMP_DATA_SHIFT		0
#define BD_CTL_COMP_DATA_MASK		0x0000FFFF

/*
 * COMPLETION QUEUE
 */