Commit 9b4a66fd authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-next-2019-09-05' of...

Merge tag 'misc-habanalabs-next-2019-09-05' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.4:

- Create an additional char device per PCI device. The new char device
  allows any application to query the device for stats, information, idle
  state and more. This is needed to support system/monitoring
  applications, while also allowing the deep-learning application to send
  work to the ASIC through the main (original) char device.

- Fix possible kernel crash in case user supplies a smaller-than-required
  buffer to the DEBUG IOCTL.

- Expose the device to userspace only after initialization was done, to
  prevent a race between the initialization and user submitting workloads.

- Add uapi, as part of INFO IOCTL, to allow user to query the device
  utilization rate.

- Add uapi, as part of INFO IOCTL, to allow user to retrieve aggregate H/W
  events, i.e. counting H/W events from the loading of the driver.

- Register to the HWMON subsystem with the board's name, to allow the
  user to prepare a custom sensor file per board.

- Use correct macros for endian swapping.

- Improve error printing in multiple places.

- Small bug fixes.

* tag 'misc-habanalabs-next-2019-09-05' of git://people.freedesktop.org/~gabbayo/linux: (30 commits)
  habanalabs: correctly cast variable to __le32
  habanalabs: show correct id in error print
  habanalabs: stop using the acronym KMD
  habanalabs: display card name as sensors header
  habanalabs: add uapi to retrieve aggregate H/W events
  habanalabs: add uapi to retrieve device utilization
  habanalabs: Make the Coresight timestamp perpetual
  habanalabs: explicitly set the queue-id enumerated numbers
  habanalabs: print to kernel log when reset is finished
  habanalabs: replace __le32_to_cpu with le32_to_cpu
  habanalabs: replace __cpu_to_le32/64 with cpu_to_le32/64
  habanalabs: Handle HW_IP_INFO if device disabled or in reset
  habanalabs: Expose devices after initialization is done
  habanalabs: improve security in Debug IOCTL
  habanalabs: use default structure for user input in Debug IOCTL
  habanalabs: Add descriptive name to PSOC app status register
  habanalabs: Add descriptive names to PSOC scratch-pad registers
  habanalabs: create two char devices per ASIC
  habanalabs: change device_setup_cdev() to be more generic
  habanalabs: maintain a list of file private data objects
  ...
parents 25ec8710 6dc66f7c
Loading
Loading
Loading
Loading
+4 −10
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@ KernelVersion: 5.1
Contact:        oded.gabbay@gmail.com
Description:    Allows the user to set the maximum clock frequency for MME, TPC
                and IC when the power management profile is set to "automatic".
                This property is valid only for the Goya ASIC family

What:           /sys/class/habanalabs/hl<n>/ic_clk
Date:           Jan 2019
@@ -127,8 +128,8 @@ Description: Power management profile. Values are "auto", "manual". In "auto"
                the max clock frequency to a low value when there are no user
                processes that are opened on the device's file. In "manual"
                mode, the user sets the maximum clock frequency by writing to
                ic_clk, mme_clk and tpc_clk

                ic_clk, mme_clk and tpc_clk. This property is valid only for
                the Goya ASIC family

What:           /sys/class/habanalabs/hl<n>/preboot_btl_ver
Date:           Jan 2019
@@ -187,10 +188,3 @@ Date: Jan 2019
KernelVersion:  5.1
Contact:        oded.gabbay@gmail.com
Description:    Version of the u-boot running on the device's CPU
 No newline at end of file

What:           /sys/class/habanalabs/hl<n>/write_open_cnt
Date:           Jan 2019
KernelVersion:  5.1
Contact:        oded.gabbay@gmail.com
Description:    Displays the total number of user processes that are currently
                opened on the device's file
+1 −1
Original line number Diff line number Diff line
@@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev)

	mutex_init(&hdev->asid_mutex);

	/* ASID 0 is reserved for KMD and device CPU */
	/* ASID 0 is reserved for the kernel driver and device CPU */
	set_bit(0, hdev->asid_bitmap);

	return 0;
+2 −1
Original line number Diff line number Diff line
@@ -397,7 +397,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
			HL_KERNEL_ASID_ID);
	if (rc) {
		dev_err(hdev->dev, "Failed to allocate CB for KMD %d\n", rc);
		dev_err(hdev->dev,
			"Failed to allocate CB for the kernel driver %d\n", rc);
		return NULL;
	}

+21 −6
Original line number Diff line number Diff line
@@ -178,11 +178,23 @@ static void cs_do_release(struct kref *ref)

	/* We also need to update CI for internal queues */
	if (cs->submitted) {
		int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
		hdev->asic_funcs->hw_queues_lock(hdev);

		WARN_ONCE((cs_cnt < 0),
			"hl%d: error in CS active cnt %d\n",
			hdev->id, cs_cnt);
		hdev->cs_active_cnt--;
		if (!hdev->cs_active_cnt) {
			struct hl_device_idle_busy_ts *ts;

			ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
			ts->busy_to_idle_ts = ktime_get();

			if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
				hdev->idle_busy_ts_idx = 0;
		} else if (hdev->cs_active_cnt < 0) {
			dev_crit(hdev->dev, "CS active cnt %d is negative\n",
				hdev->cs_active_cnt);
		}

		hdev->asic_funcs->hw_queues_unlock(hdev);

		hl_int_hw_queue_update_ci(cs);

@@ -305,6 +317,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
	other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
	if ((other) && (!dma_fence_is_signaled(other))) {
		spin_unlock(&ctx->cs_lock);
		dev_dbg(hdev->dev,
			"Rejecting CS because of too many in-flights CS\n");
		rc = -EAGAIN;
		goto free_fence;
	}
@@ -395,8 +409,9 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
		return NULL;
	}

	if (hw_queue_prop->kmd_only) {
		dev_err(hdev->dev, "Queue index %d is restricted for KMD\n",
	if (hw_queue_prop->driver_only) {
		dev_err(hdev->dev,
			"Queue index %d is restricted for the kernel driver\n",
			chunk->queue_index);
		return NULL;
	} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
+24 −16
Original line number Diff line number Diff line
@@ -26,12 +26,13 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
		dma_fence_put(ctx->cs_pending[i]);

	if (ctx->asid != HL_KERNEL_ASID_ID) {
		/*
		 * The engines are stopped as there is no executing CS, but the
		/* The engines are stopped as there is no executing CS, but the
		 * Coresight might be still working by accessing addresses
		 * related to the stopped engines. Hence stop it explicitly.
		 * Stop only if this is the compute context, as there can be
		 * only one compute context
		 */
		if (hdev->in_debug)
		if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
			hl_device_set_debug_mode(hdev, false);

		hl_vm_ctx_fini(ctx);
@@ -67,29 +68,36 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
		goto out_err;
	}

	mutex_lock(&mgr->ctx_lock);
	rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
	mutex_unlock(&mgr->ctx_lock);

	if (rc < 0) {
		dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
		goto free_ctx;
	}

	ctx->handle = rc;

	rc = hl_ctx_init(hdev, ctx, false);
	if (rc)
		goto free_ctx;
		goto remove_from_idr;

	hl_hpriv_get(hpriv);
	ctx->hpriv = hpriv;

	/* TODO: remove for multiple contexts */
	/* TODO: remove for multiple contexts per process */
	hpriv->ctx = ctx;
	hdev->user_ctx = ctx;

	mutex_lock(&mgr->ctx_lock);
	rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
	mutex_unlock(&mgr->ctx_lock);

	if (rc < 0) {
		dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
		hl_ctx_free(hdev, ctx);
		goto out_err;
	}
	/* TODO: remove the following line for multiple process support */
	hdev->compute_ctx = ctx;

	return 0;

remove_from_idr:
	mutex_lock(&mgr->ctx_lock);
	idr_remove(&mgr->ctx_handles, ctx->handle);
	mutex_unlock(&mgr->ctx_lock);
free_ctx:
	kfree(ctx);
out_err:
@@ -120,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
	ctx->thread_ctx_switch_wait_token = 0;

	if (is_kernel_ctx) {
		ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
		rc = hl_mmu_ctx_init(ctx);
		if (rc) {
			dev_err(hdev->dev, "Failed to init mmu ctx module\n");
Loading