Merge tag 'misc-habanalabs-next-2020-05-19' of... (2bd7d8df) · Commits · 戴 / test

Documentation/ABI/testing/debugfs-driver-habanalabs

+17 −0

Original line number	Diff line number	Diff line
		@@ -8,6 +8,16 @@ Description: Sets the device address to be used for read or write through
		only when the IOMMU is disabled.
		The acceptable value is a string that starts with "0x"

		What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
		Date: May 2020
		KernelVersion: 5.8
		Contact: oded.gabbay@gmail.com
		Description: Allow the root user to disable/enable in runtime the clock
		gating mechanism in Gaudi. Due to how Gaudi is built, the
		clock gating needs to be disabled in order to access the
		registers of the TPC and MME engines. This is sometimes needed
		during debug by the user and hence the user needs this option

		What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
		Date: Jan 2019
		KernelVersion: 5.1
		@@ -150,3 +160,10 @@ KernelVersion: 5.1
		Contact: oded.gabbay@gmail.com
		Description: Displays a list with information about all the active virtual
		address mappings per ASID

		What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
		Date: Mar 2020
		KernelVersion: 5.6
		Contact: oded.gabbay@gmail.com
		Description: Sets the stop-on_error option for the device engines. Value of
		"0" is for disable, otherwise enable.

Documentation/ABI/testing/sysfs-driver-habanalabs

+17 −0

Original line number	Diff line number	Diff line
		@@ -10,6 +10,23 @@ KernelVersion: 5.1
		Contact: oded.gabbay@gmail.com
		Description: Version of the application running on the device's CPU

		What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
		Date: Jun 2019
		KernelVersion: not yet upstreamed
		Contact: oded.gabbay@gmail.com
		Description: Allows the user to set the maximum clock frequency, in MHz.
		The device clock might be set to lower value than the maximum.
		The user should read the clk_cur_freq_mhz to see the actual
		frequency value of the device clock. This property is valid
		only for the Gaudi ASIC family

		What: /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
		Date: Jun 2019
		KernelVersion: not yet upstreamed
		Contact: oded.gabbay@gmail.com
		Description: Displays the current frequency, in MHz, of the device clock.
		This property is valid only for the Gaudi ASIC family

		What: /sys/class/habanalabs/hl<n>/cpld_ver
		Date: Jan 2019
		KernelVersion: 5.1

drivers/misc/habanalabs/Makefile

+3 −0

Original line number	Diff line number	Diff line
		@@ -13,3 +13,6 @@ habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o

		include $(src)/goya/Makefile
		habanalabs-y += $(HL_GOYA_FILES)

		include $(src)/gaudi/Makefile
		habanalabs-y += $(HL_GAUDI_FILES)

drivers/misc/habanalabs/command_buffer.c

+19 −9

Original line number	Diff line number	Diff line
		@@ -105,10 +105,9 @@ int hl_cb_create(struct hl_device hdev, struct hl_cb_mgr mgr,
		goto out_err;
		}

		if (cb_size > HL_MAX_CB_SIZE) {
		dev_err(hdev->dev,
		"CB size %d must be less then %d\n",
		cb_size, HL_MAX_CB_SIZE);
		if (cb_size > SZ_2M) {
		dev_err(hdev->dev, "CB size %d must be less than %d\n",
		cb_size, SZ_2M);
		rc = -EINVAL;
		goto out_err;
		}
		@@ -211,7 +210,7 @@ int hl_cb_ioctl(struct hl_fpriv hpriv, void data)
		{
		union hl_cb_args *args = data;
		struct hl_device *hdev = hpriv->hdev;
		u64 handle;
		u64 handle = 0;
		int rc;

		if (hl_device_disabled_or_in_reset(hdev)) {
		@@ -223,15 +222,26 @@ int hl_cb_ioctl(struct hl_fpriv hpriv, void data)

		switch (args->in.op) {
		case HL_CB_OP_CREATE:
		rc = hl_cb_create(hdev, &hpriv->cb_mgr, args->in.cb_size,
		&handle, hpriv->ctx->asid);
		if (args->in.cb_size > HL_MAX_CB_SIZE) {
		dev_err(hdev->dev,
		"User requested CB size %d must be less than %d\n",
		args->in.cb_size, HL_MAX_CB_SIZE);
		rc = -EINVAL;
		} else {
		rc = hl_cb_create(hdev, &hpriv->cb_mgr,
		args->in.cb_size, &handle,
		hpriv->ctx->asid);
		}

		memset(args, 0, sizeof(*args));
		args->out.cb_handle = handle;
		break;

		case HL_CB_OP_DESTROY:
		rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
		args->in.cb_handle);
		break;

		default:
		rc = -ENOTTY;
		break;
		@@ -278,7 +288,7 @@ int hl_cb_mmap(struct hl_fpriv hpriv, struct vm_area_struct vma)
		cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
		if (!cb) {
		dev_err(hdev->dev,
		"CB mmap failed, no match to handle %d\n", handle);
		"CB mmap failed, no match to handle 0x%x\n", handle);
		return -EINVAL;
		}

		@@ -347,7 +357,7 @@ struct hl_cb hl_cb_get(struct hl_device hdev, struct hl_cb_mgr *mgr,
		if (!cb) {
		spin_unlock(&mgr->cb_lock);
		dev_warn(hdev->dev,
		"CB get failed, no match to handle %d\n", handle);
		"CB get failed, no match to handle 0x%x\n", handle);
		return NULL;
		}

drivers/misc/habanalabs/command_submission.c

+354 −30

Original line number	Diff line number	Diff line
		@@ -11,11 +11,33 @@
		#include <linux/uaccess.h>
		#include <linux/slab.h>

		#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL \| HL_CS_FLAGS_WAIT)

		static void job_wq_completion(struct work_struct *work);
		static long _hl_cs_wait_ioctl(struct hl_device *hdev,
		struct hl_ctx *ctx, u64 timeout_us, u64 seq);
		static void cs_do_release(struct kref *ref);

		static void hl_sob_reset(struct kref *ref)
		{
		struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
		kref);
		struct hl_device *hdev = hw_sob->hdev;

		hdev->asic_funcs->reset_sob(hdev, hw_sob);
		}

		void hl_sob_reset_error(struct kref *ref)
		{
		struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
		kref);
		struct hl_device *hdev = hw_sob->hdev;

		dev_crit(hdev->dev,
		"SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
		hw_sob->q_idx, hw_sob->sob_id);
		}

		static const char hl_fence_get_driver_name(struct dma_fence fence)
		{
		return "HabanaLabs";
		@@ -23,10 +45,10 @@ static const char hl_fence_get_driver_name(struct dma_fence fence)

		static const char hl_fence_get_timeline_name(struct dma_fence fence)
		{
		struct hl_dma_fence *hl_fence =
		container_of(fence, struct hl_dma_fence, base_fence);
		struct hl_cs_compl *hl_cs_compl =
		container_of(fence, struct hl_cs_compl, base_fence);

		return dev_name(hl_fence->hdev->dev);
		return dev_name(hl_cs_compl->hdev->dev);
		}

		static bool hl_fence_enable_signaling(struct dma_fence *fence)
		@@ -36,10 +58,41 @@ static bool hl_fence_enable_signaling(struct dma_fence *fence)

		static void hl_fence_release(struct dma_fence *fence)
		{
		struct hl_dma_fence *hl_fence =
		container_of(fence, struct hl_dma_fence, base_fence);
		struct hl_cs_compl *hl_cs_cmpl =
		container_of(fence, struct hl_cs_compl, base_fence);
		struct hl_device *hdev = hl_cs_cmpl->hdev;

		if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) \|\|
		(hl_cs_cmpl->type == CS_TYPE_WAIT)) {

		dev_dbg(hdev->dev,
		"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
		hl_cs_cmpl->cs_seq,
		hl_cs_cmpl->type,
		hl_cs_cmpl->hw_sob->sob_id,
		hl_cs_cmpl->sob_val);

		/*
		* A signal CS can get completion while the corresponding wait
		* for signal CS is on its way to the PQ. The wait for signal CS
		* will get stuck if the signal CS incremented the SOB to its
		* max value and there are no pending (submitted) waits on this
		* SOB.
		* We do the following to void this situation:
		* 1. The wait for signal CS must get a ref for the signal CS as
		* soon as possible in cs_ioctl_signal_wait() and put it
		* before being submitted to the PQ but after it incremented
		* the SOB refcnt in init_signal_wait_cs().
		* 2. Signal/Wait for signal CS will decrement the SOB refcnt
		* here.
		* These two measures guarantee that the wait for signal CS will
		* reset the SOB upon completion rather than the signal CS and
		* hence the above scenario is avoided.
		*/
		kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
		}

		kfree_rcu(hl_fence, base_fence.rcu);
		kfree_rcu(hl_cs_cmpl, base_fence.rcu);
		}

		static const struct dma_fence_ops hl_fence_ops = {
		@@ -113,6 +166,7 @@ static int cs_parser(struct hl_fpriv hpriv, struct hl_cs_job job)
		if (!rc) {
		job->patched_cb = parser.patched_cb;
		job->job_cb_size = parser.patched_cb_size;
		job->contains_dma_pkt = parser.contains_dma_pkt;

		spin_lock(&job->patched_cb->lock);
		job->patched_cb->cs_cnt++;
		@@ -259,6 +313,12 @@ static void cs_do_release(struct kref *ref)

		spin_unlock(&hdev->hw_queues_mirror_lock);
		}
		} else if (cs->type == CS_TYPE_WAIT) {
		/*
		* In case the wait for signal CS was submitted, the put occurs
		* in init_signal_wait_cs() right before hanging on the PQ.
		*/
		dma_fence_put(cs->signal_fence);
		}

		/*
		@@ -312,9 +372,9 @@ static void cs_timedout(struct work_struct *work)
		}

		static int allocate_cs(struct hl_device hdev, struct hl_ctx ctx,
		struct hl_cs **cs_new)
		enum hl_cs_type cs_type, struct hl_cs **cs_new)
		{
		struct hl_dma_fence *fence;
		struct hl_cs_compl *cs_cmpl;
		struct dma_fence *other = NULL;
		struct hl_cs *cs;
		int rc;
		@@ -326,25 +386,27 @@ static int allocate_cs(struct hl_device hdev, struct hl_ctx ctx,
		cs->ctx = ctx;
		cs->submitted = false;
		cs->completed = false;
		cs->type = cs_type;
		INIT_LIST_HEAD(&cs->job_list);
		INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
		kref_init(&cs->refcount);
		spin_lock_init(&cs->job_lock);

		fence = kmalloc(sizeof(*fence), GFP_ATOMIC);
		if (!fence) {
		cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
		if (!cs_cmpl) {
		rc = -ENOMEM;
		goto free_cs;
		}

		fence->hdev = hdev;
		spin_lock_init(&fence->lock);
		cs->fence = &fence->base_fence;
		cs_cmpl->hdev = hdev;
		cs_cmpl->type = cs->type;
		spin_lock_init(&cs_cmpl->lock);
		cs->fence = &cs_cmpl->base_fence;

		spin_lock(&ctx->cs_lock);

		fence->cs_seq = ctx->cs_sequence;
		other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
		cs_cmpl->cs_seq = ctx->cs_sequence;
		other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
		if ((other) && (!dma_fence_is_signaled(other))) {
		spin_unlock(&ctx->cs_lock);
		dev_dbg(hdev->dev,
		@@ -353,16 +415,16 @@ static int allocate_cs(struct hl_device hdev, struct hl_ctx ctx,
		goto free_fence;
		}

		dma_fence_init(&fence->base_fence, &hl_fence_ops, &fence->lock,
		dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
		ctx->asid, ctx->cs_sequence);

		cs->sequence = fence->cs_seq;
		cs->sequence = cs_cmpl->cs_seq;

		ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)] =
		&fence->base_fence;
		ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
		&cs_cmpl->base_fence;
		ctx->cs_sequence++;

		dma_fence_get(&fence->base_fence);
		dma_fence_get(&cs_cmpl->base_fence);

		dma_fence_put(other);

		@@ -373,7 +435,7 @@ static int allocate_cs(struct hl_device hdev, struct hl_ctx ctx,
		return 0;

		free_fence:
		kfree(fence);
		kfree(cs_cmpl);
		free_cs:
		kfree(cs);
		return rc;
		@@ -499,7 +561,7 @@ struct hl_cs_job hl_cs_allocate_job(struct hl_device hdev,
		return job;
		}

		static int _hl_cs_ioctl(struct hl_fpriv hpriv, void __user chunks,
		static int cs_ioctl_default(struct hl_fpriv hpriv, void __user chunks,
		u32 num_chunks, u64 *cs_seq)
		{
		struct hl_device *hdev = hpriv->hdev;
		@@ -538,7 +600,7 @@ static int _hl_cs_ioctl(struct hl_fpriv hpriv, void __user chunks,
		/* increment refcnt for context */
		hl_ctx_get(hdev, hpriv->ctx);

		rc = allocate_cs(hdev, hpriv->ctx, &cs);
		rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
		if (rc) {
		hl_ctx_put(hpriv->ctx);
		goto free_cs_chunk_array;
		@@ -652,13 +714,230 @@ out:
		return rc;
		}

		static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
		void __user *chunks, u32 num_chunks,
		u64 *cs_seq)
		{
		struct hl_device *hdev = hpriv->hdev;
		struct hl_ctx *ctx = hpriv->ctx;
		struct hl_cs_chunk cs_chunk_array, chunk;
		struct hw_queue_properties *hw_queue_prop;
		struct dma_fence *sig_fence = NULL;
		struct hl_cs_job *job;
		struct hl_cs *cs;
		struct hl_cb *cb;
		u64 *signal_seq_arr = NULL, signal_seq;
		u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
		int rc;

		*cs_seq = ULLONG_MAX;

		if (num_chunks > HL_MAX_JOBS_PER_CS) {
		dev_err(hdev->dev,
		"Number of chunks can NOT be larger than %d\n",
		HL_MAX_JOBS_PER_CS);
		rc = -EINVAL;
		goto out;
		}

		cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
		GFP_ATOMIC);
		if (!cs_chunk_array) {
		rc = -ENOMEM;
		goto out;
		}

		size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
		if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
		rc = -EFAULT;
		goto free_cs_chunk_array;
		}

		/* currently it is guaranteed to have only one chunk */
		chunk = &cs_chunk_array[0];
		q_idx = chunk->queue_index;
		hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];

		if ((q_idx >= HL_MAX_QUEUES) \|\|
		(hw_queue_prop->type != QUEUE_TYPE_EXT)) {
		dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
		rc = -EINVAL;
		goto free_cs_chunk_array;
		}

		if (cs_type == CS_TYPE_WAIT) {
		struct hl_cs_compl *sig_waitcs_cmpl;

		signal_seq_arr_len = chunk->num_signal_seq_arr;

		/* currently only one signal seq is supported */
		if (signal_seq_arr_len != 1) {
		dev_err(hdev->dev,
		"Wait for signal CS supports only one signal CS seq\n");
		rc = -EINVAL;
		goto free_cs_chunk_array;
		}

		signal_seq_arr = kmalloc_array(signal_seq_arr_len,
		sizeof(*signal_seq_arr),
		GFP_ATOMIC);
		if (!signal_seq_arr) {
		rc = -ENOMEM;
		goto free_cs_chunk_array;
		}

		size_to_copy = chunk->num_signal_seq_arr *
		sizeof(*signal_seq_arr);
		if (copy_from_user(signal_seq_arr,
		(void __user *) chunk->signal_seq_arr,
		size_to_copy)) {
		dev_err(hdev->dev,
		"Failed to copy signal seq array from user\n");
		rc = -EFAULT;
		goto free_signal_seq_array;
		}

		/* currently it is guaranteed to have only one signal seq */
		signal_seq = signal_seq_arr[0];
		sig_fence = hl_ctx_get_fence(ctx, signal_seq);
		if (IS_ERR(sig_fence)) {
		dev_err(hdev->dev,
		"Failed to get signal CS with seq 0x%llx\n",
		signal_seq);
		rc = PTR_ERR(sig_fence);
		goto free_signal_seq_array;
		}

		if (!sig_fence) {
		/* signal CS already finished */
		rc = 0;
		goto free_signal_seq_array;
		}

		sig_waitcs_cmpl =
		container_of(sig_fence, struct hl_cs_compl, base_fence);

		if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
		dev_err(hdev->dev,
		"CS seq 0x%llx is not of a signal CS\n",
		signal_seq);
		dma_fence_put(sig_fence);
		rc = -EINVAL;
		goto free_signal_seq_array;
		}

		if (dma_fence_is_signaled(sig_fence)) {
		/* signal CS already finished */
		dma_fence_put(sig_fence);
		rc = 0;
		goto free_signal_seq_array;
		}
		}

		/* increment refcnt for context */
		hl_ctx_get(hdev, ctx);

		rc = allocate_cs(hdev, ctx, cs_type, &cs);
		if (rc) {
		if (cs_type == CS_TYPE_WAIT)
		dma_fence_put(sig_fence);
		hl_ctx_put(ctx);
		goto free_signal_seq_array;
		}

		/*
		* Save the signal CS fence for later initialization right before
		* hanging the wait CS on the queue.
		*/
		if (cs->type == CS_TYPE_WAIT)
		cs->signal_fence = sig_fence;

		hl_debugfs_add_cs(cs);

		*cs_seq = cs->sequence;

		job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
		if (!job) {
		dev_err(hdev->dev, "Failed to allocate a new job\n");
		rc = -ENOMEM;
		goto put_cs;
		}

		cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
		if (!cb) {
		kfree(job);
		rc = -EFAULT;
		goto put_cs;
		}

		if (cs->type == CS_TYPE_WAIT)
		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
		else
		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);

		job->id = 0;
		job->cs = cs;
		job->user_cb = cb;
		job->user_cb->cs_cnt++;
		job->user_cb_size = cb_size;
		job->hw_queue_id = q_idx;

		/*
		* No need in parsing, user CB is the patched CB.
		* We call hl_cb_destroy() out of two reasons - we don't need the CB in
		* the CB idr anymore and to decrement its refcount as it was
		* incremented inside hl_cb_kernel_create().
		*/
		job->patched_cb = job->user_cb;
		job->job_cb_size = job->user_cb_size;
		hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);

		cs->jobs_in_queue_cnt[job->hw_queue_id]++;

		list_add_tail(&job->cs_node, &cs->job_list);

		/* increment refcount as for external queues we get completion */
		cs_get(cs);

		hl_debugfs_add_job(hdev, job);

		rc = hl_hw_queue_schedule_cs(cs);
		if (rc) {
		if (rc != -EAGAIN)
		dev_err(hdev->dev,
		"Failed to submit CS %d.%llu to H/W queues, error %d\n",
		ctx->asid, cs->sequence, rc);
		goto free_cs_object;
		}

		rc = HL_CS_STATUS_SUCCESS;
		goto put_cs;

		free_cs_object:
		cs_rollback(hdev, cs);
		*cs_seq = ULLONG_MAX;
		/* The path below is both for good and erroneous exits */
		put_cs:
		/* We finished with the CS in this function, so put the ref */
		cs_put(cs);
		free_signal_seq_array:
		if (cs_type == CS_TYPE_WAIT)
		kfree(signal_seq_arr);
		free_cs_chunk_array:
		kfree(cs_chunk_array);
		out:
		return rc;
		}

		int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		{
		struct hl_device *hdev = hpriv->hdev;
		union hl_cs_args *args = data;
		struct hl_ctx *ctx = hpriv->ctx;
		void __user chunks_execute, chunks_restore;
		u32 num_chunks_execute, num_chunks_restore;
		enum hl_cs_type cs_type;
		u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
		u64 cs_seq = ULONG_MAX;
		int rc, do_ctx_switch;
		bool need_soft_reset = false;
		@@ -671,9 +950,34 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		goto out;
		}

		sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;

		if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
		dev_err(hdev->dev,
		"Signal and wait CS flags are mutually exclusive, context %d\n",
		ctx->asid);
		rc = -EINVAL;
		goto out;
		}

		if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
		(!hdev->supports_sync_stream))) {
		dev_err(hdev->dev, "Sync stream CS is not supported\n");
		rc = -EINVAL;
		goto out;
		}

		if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
		cs_type = CS_TYPE_SIGNAL;
		else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
		cs_type = CS_TYPE_WAIT;
		else
		cs_type = CS_TYPE_DEFAULT;

		chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
		num_chunks_execute = args->in.num_chunks_execute;

		if (cs_type == CS_TYPE_DEFAULT) {
		if (!num_chunks_execute) {
		dev_err(hdev->dev,
		"Got execute CS with 0 chunks, context %d\n",
		@@ -681,6 +985,13 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		rc = -EINVAL;
		goto out;
		}
		} else if (num_chunks_execute != 1) {
		dev_err(hdev->dev,
		"Sync stream CS mandates one chunk only, context %d\n",
		ctx->asid);
		rc = -EINVAL;
		goto out;
		}

		do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);

		@@ -722,7 +1033,7 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		"Need to run restore phase but restore CS is empty\n");
		rc = 0;
		} else {
		rc = _hl_cs_ioctl(hpriv, chunks_restore,
		rc = cs_ioctl_default(hpriv, chunks_restore,
		num_chunks_restore, &cs_seq);
		}

		@@ -764,7 +1075,12 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		}
		}

		rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq);
		if (cs_type == CS_TYPE_DEFAULT)
		rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
		&cs_seq);
		else
		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
		num_chunks_execute, &cs_seq);

		out:
		if (rc != -EAGAIN) {
		@@ -796,6 +1112,10 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
		fence = hl_ctx_get_fence(ctx, seq);
		if (IS_ERR(fence)) {
		rc = PTR_ERR(fence);
		if (rc == -EINVAL)
		dev_notice_ratelimited(hdev->dev,
		"Can't wait on seq %llu because current CS is at seq %llu\n",
		seq, ctx->cs_sequence);
		} else if (fence) {
		rc = dma_fence_wait_timeout(fence, true, timeout);
		if (fence->error == -ETIMEDOUT)
		@@ -803,8 +1123,12 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
		else if (fence->error == -EIO)
		rc = -EIO;
		dma_fence_put(fence);
		} else
		} else {
		dev_dbg(hdev->dev,
		"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
		seq, ctx->cs_sequence);
		rc = 1;
		}

		hl_ctx_put(ctx);

Admin message