Commit 2bd7d8df authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-next-2020-05-19' of...

Merge tag 'misc-habanalabs-next-2020-05-19' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.8:

- GAUDI ASIC support. The tag contains code and header files needed to
  initialize the GAUDI ASIC and run workloads on it. There are changes to
  the common code that are needed for GAUDI and there is the addition of
  new ASIC-dependent code of GAUDI.

- Add new feature of signal/wait command submissions. This is relevant to
  GAUDI only and allows the user to sync between streams (queues) inside
  the device.

- Allow user to retrieve the device time alongside the host time, to allow
  a user application to synchronize device time together with host
  time during profiling.

- Change ASIC's CPU initialization by loading its boot loader code from the
  Host memory (instead of it being programmed on the on-board FLASH).

- Expose more attributes through HWMON.

- Move the ASIC event handling code to be "common code". This will be
  shared between GAUDI and future ASICs. Goya will still use its own code.

- Fix bug in command submission parsing in Goya.

- Small fixes to security configuration (open up some registers for user
  access).

- Improvements to ASIC reset code.

* tag 'misc-habanalabs-next-2020-05-19' of git://people.freedesktop.org/~gabbayo/linux: (38 commits)
  habanalabs: update patched_cb_size for Wreg32
  habanalabs: move event handling to common firmware file
  habanalabs: enable gaudi code in driver
  habanalabs: add gaudi profiler module
  habanalabs: add gaudi security module
  habanalabs: add hwmgr module for gaudi
  habanalabs: add gaudi asic-dependent code
  uapi: habanalabs: add gaudi defines
  habanalabs: add gaudi asic registers header files
  habanalabs: get card type, location from F/W
  habanalabs: support clock gating enable/disable
  habanalabs: set PM profile to auto only for goya
  habanalabs: add dedicated define for hard reset
  habanalabs: check if CoreSight is supported
  habanalabs: add signal/wait to CS IOCTL operations
  habanalabs: handle the h/w sync object
  habanalabs: define ASIC-dependent interface for signal/wait
  uapi: habanalabs: add signal/wait operations
  habanalabs: add missing MODULE_DEVICE_TABLE
  habanalabs: print all CB handles as hex numbers
  ...
parents 57c76221 87eaea1c
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -8,6 +8,16 @@ Description: Sets the device address to be used for read or write through
                only when the IOMMU is disabled.
                The acceptable value is a string that starts with "0x"

What:           /sys/kernel/debug/habanalabs/hl<n>/clk_gate
Date:           May 2020
KernelVersion:  5.8
Contact:        oded.gabbay@gmail.com
Description:    Allow the root user to disable/enable in runtime the clock
                gating mechanism in Gaudi. Due to how Gaudi is built, the
                clock gating needs to be disabled in order to access the
                registers of the TPC and MME engines. This is sometimes needed
                during debug by the user and hence the user needs this option

What:           /sys/kernel/debug/habanalabs/hl<n>/command_buffers
Date:           Jan 2019
KernelVersion:  5.1
@@ -150,3 +160,10 @@ KernelVersion: 5.1
Contact:        oded.gabbay@gmail.com
Description:    Displays a list with information about all the active virtual
                address mappings per ASID

What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date:           Mar 2020
KernelVersion:  5.6
Contact:        oded.gabbay@gmail.com
Description:    Sets the stop-on_error option for the device engines. Value of
                "0" is for disable, otherwise enable.
+17 −0
Original line number Diff line number Diff line
@@ -10,6 +10,23 @@ KernelVersion: 5.1
Contact:        oded.gabbay@gmail.com
Description:    Version of the application running on the device's CPU

What:           /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
Date:           Jun 2019
KernelVersion:  not yet upstreamed
Contact:        oded.gabbay@gmail.com
Description:    Allows the user to set the maximum clock frequency, in MHz.
                The device clock might be set to lower value than the maximum.
                The user should read the clk_cur_freq_mhz to see the actual
                frequency value of the device clock. This property is valid
                only for the Gaudi ASIC family

What:           /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
Date:           Jun 2019
KernelVersion:  not yet upstreamed
Contact:        oded.gabbay@gmail.com
Description:    Displays the current frequency, in MHz, of the device clock.
                This property is valid only for the Gaudi ASIC family

What:           /sys/class/habanalabs/hl<n>/cpld_ver
Date:           Jan 2019
KernelVersion:  5.1
+3 −0
Original line number Diff line number Diff line
@@ -13,3 +13,6 @@ habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o

include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)

include $(src)/gaudi/Makefile
habanalabs-y += $(HL_GAUDI_FILES)
+19 −9
Original line number Diff line number Diff line
@@ -105,10 +105,9 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
		goto out_err;
	}

	if (cb_size > HL_MAX_CB_SIZE) {
		dev_err(hdev->dev,
			"CB size %d must be less then %d\n",
			cb_size, HL_MAX_CB_SIZE);
	if (cb_size > SZ_2M) {
		dev_err(hdev->dev, "CB size %d must be less than %d\n",
			cb_size, SZ_2M);
		rc = -EINVAL;
		goto out_err;
	}
@@ -211,7 +210,7 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
{
	union hl_cb_args *args = data;
	struct hl_device *hdev = hpriv->hdev;
	u64 handle;
	u64 handle = 0;
	int rc;

	if (hl_device_disabled_or_in_reset(hdev)) {
@@ -223,15 +222,26 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)

	switch (args->in.op) {
	case HL_CB_OP_CREATE:
		rc = hl_cb_create(hdev, &hpriv->cb_mgr, args->in.cb_size,
					&handle, hpriv->ctx->asid);
		if (args->in.cb_size > HL_MAX_CB_SIZE) {
			dev_err(hdev->dev,
				"User requested CB size %d must be less than %d\n",
				args->in.cb_size, HL_MAX_CB_SIZE);
			rc = -EINVAL;
		} else {
			rc = hl_cb_create(hdev, &hpriv->cb_mgr,
						args->in.cb_size, &handle,
						hpriv->ctx->asid);
		}

		memset(args, 0, sizeof(*args));
		args->out.cb_handle = handle;
		break;

	case HL_CB_OP_DESTROY:
		rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
					args->in.cb_handle);
		break;

	default:
		rc = -ENOTTY;
		break;
@@ -278,7 +288,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
	cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
	if (!cb) {
		dev_err(hdev->dev,
			"CB mmap failed, no match to handle %d\n", handle);
			"CB mmap failed, no match to handle 0x%x\n", handle);
		return -EINVAL;
	}

@@ -347,7 +357,7 @@ struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
	if (!cb) {
		spin_unlock(&mgr->cb_lock);
		dev_warn(hdev->dev,
			"CB get failed, no match to handle %d\n", handle);
			"CB get failed, no match to handle 0x%x\n", handle);
		return NULL;
	}

+354 −30
Original line number Diff line number Diff line
@@ -11,11 +11,33 @@
#include <linux/uaccess.h>
#include <linux/slab.h>

#define HL_CS_FLAGS_SIG_WAIT	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)

static void job_wq_completion(struct work_struct *work);
static long _hl_cs_wait_ioctl(struct hl_device *hdev,
		struct hl_ctx *ctx, u64 timeout_us, u64 seq);
static void cs_do_release(struct kref *ref);

static void hl_sob_reset(struct kref *ref)
{
	struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
							kref);
	struct hl_device *hdev = hw_sob->hdev;

	hdev->asic_funcs->reset_sob(hdev, hw_sob);
}

void hl_sob_reset_error(struct kref *ref)
{
	struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
							kref);
	struct hl_device *hdev = hw_sob->hdev;

	dev_crit(hdev->dev,
			"SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
			hw_sob->q_idx, hw_sob->sob_id);
}

static const char *hl_fence_get_driver_name(struct dma_fence *fence)
{
	return "HabanaLabs";
@@ -23,10 +45,10 @@ static const char *hl_fence_get_driver_name(struct dma_fence *fence)

static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
{
	struct hl_dma_fence *hl_fence =
		container_of(fence, struct hl_dma_fence, base_fence);
	struct hl_cs_compl *hl_cs_compl =
		container_of(fence, struct hl_cs_compl, base_fence);

	return dev_name(hl_fence->hdev->dev);
	return dev_name(hl_cs_compl->hdev->dev);
}

static bool hl_fence_enable_signaling(struct dma_fence *fence)
@@ -36,10 +58,41 @@ static bool hl_fence_enable_signaling(struct dma_fence *fence)

static void hl_fence_release(struct dma_fence *fence)
{
	struct hl_dma_fence *hl_fence =
		container_of(fence, struct hl_dma_fence, base_fence);
	struct hl_cs_compl *hl_cs_cmpl =
		container_of(fence, struct hl_cs_compl, base_fence);
	struct hl_device *hdev = hl_cs_cmpl->hdev;

	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
			(hl_cs_cmpl->type == CS_TYPE_WAIT)) {

		dev_dbg(hdev->dev,
			"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
			hl_cs_cmpl->cs_seq,
			hl_cs_cmpl->type,
			hl_cs_cmpl->hw_sob->sob_id,
			hl_cs_cmpl->sob_val);

		/*
		 * A signal CS can get completion while the corresponding wait
		 * for signal CS is on its way to the PQ. The wait for signal CS
		 * will get stuck if the signal CS incremented the SOB to its
		 * max value and there are no pending (submitted) waits on this
		 * SOB.
		 * We do the following to void this situation:
		 * 1. The wait for signal CS must get a ref for the signal CS as
		 *    soon as possible in cs_ioctl_signal_wait() and put it
		 *    before being submitted to the PQ but after it incremented
		 *    the SOB refcnt in init_signal_wait_cs().
		 * 2. Signal/Wait for signal CS will decrement the SOB refcnt
		 *    here.
		 * These two measures guarantee that the wait for signal CS will
		 * reset the SOB upon completion rather than the signal CS and
		 * hence the above scenario is avoided.
		 */
		kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
	}

	kfree_rcu(hl_fence, base_fence.rcu);
	kfree_rcu(hl_cs_cmpl, base_fence.rcu);
}

static const struct dma_fence_ops hl_fence_ops = {
@@ -113,6 +166,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
		if (!rc) {
			job->patched_cb = parser.patched_cb;
			job->job_cb_size = parser.patched_cb_size;
			job->contains_dma_pkt = parser.contains_dma_pkt;

			spin_lock(&job->patched_cb->lock);
			job->patched_cb->cs_cnt++;
@@ -259,6 +313,12 @@ static void cs_do_release(struct kref *ref)

			spin_unlock(&hdev->hw_queues_mirror_lock);
		}
	} else if (cs->type == CS_TYPE_WAIT) {
		/*
		 * In case the wait for signal CS was submitted, the put occurs
		 * in init_signal_wait_cs() right before hanging on the PQ.
		 */
		dma_fence_put(cs->signal_fence);
	}

	/*
@@ -312,9 +372,9 @@ static void cs_timedout(struct work_struct *work)
}

static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
			struct hl_cs **cs_new)
			enum hl_cs_type cs_type, struct hl_cs **cs_new)
{
	struct hl_dma_fence *fence;
	struct hl_cs_compl *cs_cmpl;
	struct dma_fence *other = NULL;
	struct hl_cs *cs;
	int rc;
@@ -326,25 +386,27 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
	cs->ctx = ctx;
	cs->submitted = false;
	cs->completed = false;
	cs->type = cs_type;
	INIT_LIST_HEAD(&cs->job_list);
	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
	kref_init(&cs->refcount);
	spin_lock_init(&cs->job_lock);

	fence = kmalloc(sizeof(*fence), GFP_ATOMIC);
	if (!fence) {
	cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
	if (!cs_cmpl) {
		rc = -ENOMEM;
		goto free_cs;
	}

	fence->hdev = hdev;
	spin_lock_init(&fence->lock);
	cs->fence = &fence->base_fence;
	cs_cmpl->hdev = hdev;
	cs_cmpl->type = cs->type;
	spin_lock_init(&cs_cmpl->lock);
	cs->fence = &cs_cmpl->base_fence;

	spin_lock(&ctx->cs_lock);

	fence->cs_seq = ctx->cs_sequence;
	other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
	cs_cmpl->cs_seq = ctx->cs_sequence;
	other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
	if ((other) && (!dma_fence_is_signaled(other))) {
		spin_unlock(&ctx->cs_lock);
		dev_dbg(hdev->dev,
@@ -353,16 +415,16 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
		goto free_fence;
	}

	dma_fence_init(&fence->base_fence, &hl_fence_ops, &fence->lock,
	dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
			ctx->asid, ctx->cs_sequence);

	cs->sequence = fence->cs_seq;
	cs->sequence = cs_cmpl->cs_seq;

	ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)] =
							&fence->base_fence;
	ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
							&cs_cmpl->base_fence;
	ctx->cs_sequence++;

	dma_fence_get(&fence->base_fence);
	dma_fence_get(&cs_cmpl->base_fence);

	dma_fence_put(other);

@@ -373,7 +435,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
	return 0;

free_fence:
	kfree(fence);
	kfree(cs_cmpl);
free_cs:
	kfree(cs);
	return rc;
@@ -499,7 +561,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
	return job;
}

static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
				u32 num_chunks, u64 *cs_seq)
{
	struct hl_device *hdev = hpriv->hdev;
@@ -538,7 +600,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
	/* increment refcnt for context */
	hl_ctx_get(hdev, hpriv->ctx);

	rc = allocate_cs(hdev, hpriv->ctx, &cs);
	rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
	if (rc) {
		hl_ctx_put(hpriv->ctx);
		goto free_cs_chunk_array;
@@ -652,13 +714,230 @@ out:
	return rc;
}

static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
				void __user *chunks, u32 num_chunks,
				u64 *cs_seq)
{
	struct hl_device *hdev = hpriv->hdev;
	struct hl_ctx *ctx = hpriv->ctx;
	struct hl_cs_chunk *cs_chunk_array, *chunk;
	struct hw_queue_properties *hw_queue_prop;
	struct dma_fence *sig_fence = NULL;
	struct hl_cs_job *job;
	struct hl_cs *cs;
	struct hl_cb *cb;
	u64 *signal_seq_arr = NULL, signal_seq;
	u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
	int rc;

	*cs_seq = ULLONG_MAX;

	if (num_chunks > HL_MAX_JOBS_PER_CS) {
		dev_err(hdev->dev,
			"Number of chunks can NOT be larger than %d\n",
			HL_MAX_JOBS_PER_CS);
		rc = -EINVAL;
		goto out;
	}

	cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
					GFP_ATOMIC);
	if (!cs_chunk_array) {
		rc = -ENOMEM;
		goto out;
	}

	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
	if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
		rc = -EFAULT;
		goto free_cs_chunk_array;
	}

	/* currently it is guaranteed to have only one chunk */
	chunk = &cs_chunk_array[0];
	q_idx = chunk->queue_index;
	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];

	if ((q_idx >= HL_MAX_QUEUES) ||
			(hw_queue_prop->type != QUEUE_TYPE_EXT)) {
		dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
		rc = -EINVAL;
		goto free_cs_chunk_array;
	}

	if (cs_type == CS_TYPE_WAIT) {
		struct hl_cs_compl *sig_waitcs_cmpl;

		signal_seq_arr_len = chunk->num_signal_seq_arr;

		/* currently only one signal seq is supported */
		if (signal_seq_arr_len != 1) {
			dev_err(hdev->dev,
				"Wait for signal CS supports only one signal CS seq\n");
			rc = -EINVAL;
			goto free_cs_chunk_array;
		}

		signal_seq_arr = kmalloc_array(signal_seq_arr_len,
						sizeof(*signal_seq_arr),
						GFP_ATOMIC);
		if (!signal_seq_arr) {
			rc = -ENOMEM;
			goto free_cs_chunk_array;
		}

		size_to_copy = chunk->num_signal_seq_arr *
				sizeof(*signal_seq_arr);
		if (copy_from_user(signal_seq_arr,
					(void __user *) chunk->signal_seq_arr,
					size_to_copy)) {
			dev_err(hdev->dev,
				"Failed to copy signal seq array from user\n");
			rc = -EFAULT;
			goto free_signal_seq_array;
		}

		/* currently it is guaranteed to have only one signal seq */
		signal_seq = signal_seq_arr[0];
		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
		if (IS_ERR(sig_fence)) {
			dev_err(hdev->dev,
				"Failed to get signal CS with seq 0x%llx\n",
				signal_seq);
			rc = PTR_ERR(sig_fence);
			goto free_signal_seq_array;
		}

		if (!sig_fence) {
			/* signal CS already finished */
			rc = 0;
			goto free_signal_seq_array;
		}

		sig_waitcs_cmpl =
			container_of(sig_fence, struct hl_cs_compl, base_fence);

		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
			dev_err(hdev->dev,
				"CS seq 0x%llx is not of a signal CS\n",
				signal_seq);
			dma_fence_put(sig_fence);
			rc = -EINVAL;
			goto free_signal_seq_array;
		}

		if (dma_fence_is_signaled(sig_fence)) {
			/* signal CS already finished */
			dma_fence_put(sig_fence);
			rc = 0;
			goto free_signal_seq_array;
		}
	}

	/* increment refcnt for context */
	hl_ctx_get(hdev, ctx);

	rc = allocate_cs(hdev, ctx, cs_type, &cs);
	if (rc) {
		if (cs_type == CS_TYPE_WAIT)
			dma_fence_put(sig_fence);
		hl_ctx_put(ctx);
		goto free_signal_seq_array;
	}

	/*
	 * Save the signal CS fence for later initialization right before
	 * hanging the wait CS on the queue.
	 */
	if (cs->type == CS_TYPE_WAIT)
		cs->signal_fence = sig_fence;

	hl_debugfs_add_cs(cs);

	*cs_seq = cs->sequence;

	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
	if (!job) {
		dev_err(hdev->dev, "Failed to allocate a new job\n");
		rc = -ENOMEM;
		goto put_cs;
	}

	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
	if (!cb) {
		kfree(job);
		rc = -EFAULT;
		goto put_cs;
	}

	if (cs->type == CS_TYPE_WAIT)
		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
	else
		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);

	job->id = 0;
	job->cs = cs;
	job->user_cb = cb;
	job->user_cb->cs_cnt++;
	job->user_cb_size = cb_size;
	job->hw_queue_id = q_idx;

	/*
	 * No need in parsing, user CB is the patched CB.
	 * We call hl_cb_destroy() out of two reasons - we don't need the CB in
	 * the CB idr anymore and to decrement its refcount as it was
	 * incremented inside hl_cb_kernel_create().
	 */
	job->patched_cb = job->user_cb;
	job->job_cb_size = job->user_cb_size;
	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);

	cs->jobs_in_queue_cnt[job->hw_queue_id]++;

	list_add_tail(&job->cs_node, &cs->job_list);

	/* increment refcount as for external queues we get completion */
	cs_get(cs);

	hl_debugfs_add_job(hdev, job);

	rc = hl_hw_queue_schedule_cs(cs);
	if (rc) {
		if (rc != -EAGAIN)
			dev_err(hdev->dev,
				"Failed to submit CS %d.%llu to H/W queues, error %d\n",
				ctx->asid, cs->sequence, rc);
		goto free_cs_object;
	}

	rc = HL_CS_STATUS_SUCCESS;
	goto put_cs;

free_cs_object:
	cs_rollback(hdev, cs);
	*cs_seq = ULLONG_MAX;
	/* The path below is both for good and erroneous exits */
put_cs:
	/* We finished with the CS in this function, so put the ref */
	cs_put(cs);
free_signal_seq_array:
	if (cs_type == CS_TYPE_WAIT)
		kfree(signal_seq_arr);
free_cs_chunk_array:
	kfree(cs_chunk_array);
out:
	return rc;
}

int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
{
	struct hl_device *hdev = hpriv->hdev;
	union hl_cs_args *args = data;
	struct hl_ctx *ctx = hpriv->ctx;
	void __user *chunks_execute, *chunks_restore;
	u32 num_chunks_execute, num_chunks_restore;
	enum hl_cs_type cs_type;
	u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
	u64 cs_seq = ULONG_MAX;
	int rc, do_ctx_switch;
	bool need_soft_reset = false;
@@ -671,9 +950,34 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
		goto out;
	}

	sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;

	if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
		dev_err(hdev->dev,
			"Signal and wait CS flags are mutually exclusive, context %d\n",
		ctx->asid);
		rc = -EINVAL;
		goto out;
	}

	if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
			(!hdev->supports_sync_stream))) {
		dev_err(hdev->dev, "Sync stream CS is not supported\n");
		rc = -EINVAL;
		goto out;
	}

	if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
		cs_type = CS_TYPE_SIGNAL;
	else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
		cs_type = CS_TYPE_WAIT;
	else
		cs_type = CS_TYPE_DEFAULT;

	chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
	num_chunks_execute = args->in.num_chunks_execute;

	if (cs_type == CS_TYPE_DEFAULT) {
		if (!num_chunks_execute) {
			dev_err(hdev->dev,
				"Got execute CS with 0 chunks, context %d\n",
@@ -681,6 +985,13 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
			rc = -EINVAL;
			goto out;
		}
	} else if (num_chunks_execute != 1) {
		dev_err(hdev->dev,
			"Sync stream CS mandates one chunk only, context %d\n",
			ctx->asid);
		rc = -EINVAL;
		goto out;
	}

	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);

@@ -722,7 +1033,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
			"Need to run restore phase but restore CS is empty\n");
			rc = 0;
		} else {
			rc = _hl_cs_ioctl(hpriv, chunks_restore,
			rc = cs_ioctl_default(hpriv, chunks_restore,
						num_chunks_restore, &cs_seq);
		}

@@ -764,7 +1075,12 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
		}
	}

	rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq);
	if (cs_type == CS_TYPE_DEFAULT)
		rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
					&cs_seq);
	else
		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
						num_chunks_execute, &cs_seq);

out:
	if (rc != -EAGAIN) {
@@ -796,6 +1112,10 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
	fence = hl_ctx_get_fence(ctx, seq);
	if (IS_ERR(fence)) {
		rc = PTR_ERR(fence);
		if (rc == -EINVAL)
			dev_notice_ratelimited(hdev->dev,
				"Can't wait on seq %llu because current CS is at seq %llu\n",
				seq, ctx->cs_sequence);
	} else if (fence) {
		rc = dma_fence_wait_timeout(fence, true, timeout);
		if (fence->error == -ETIMEDOUT)
@@ -803,8 +1123,12 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
		else if (fence->error == -EIO)
			rc = -EIO;
		dma_fence_put(fence);
	} else
	} else {
		dev_dbg(hdev->dev,
			"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
			seq, ctx->cs_sequence);
		rc = 1;
	}

	hl_ctx_put(ctx);

Loading