Commit 21e7a346 authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay
Browse files

habanalabs: sync stream generic functionality



Currently sync stream is limited only for external queues. We want to
remove this constraint by adding a new queue property dedicated for sync
stream. In addition we move the initialization and reset methods to the
common code since we can re-use them with slight changes.

Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent c16d45f4
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -740,6 +740,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
	struct hl_cs_job *job;
	struct hl_cs *cs;
	struct hl_cb *cb;
	enum hl_queue_type q_type;
	u64 *signal_seq_arr = NULL, signal_seq;
	u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
	int rc;
@@ -772,9 +773,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
	chunk = &cs_chunk_array[0];
	q_idx = chunk->queue_index;
	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
	q_type = hw_queue_prop->type;

	if ((q_idx >= HL_MAX_QUEUES) ||
			(hw_queue_prop->type != QUEUE_TYPE_EXT)) {
			(!hw_queue_prop->supports_sync_stream)) {
		dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
		rc = -EINVAL;
		goto free_cs_chunk_array;
@@ -871,7 +873,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,

	*cs_seq = cs->sequence;

	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
	job = hl_cs_allocate_job(hdev, q_type, true);
	if (!job) {
		dev_err(hdev->dev, "Failed to allocate a new job\n");
		rc = -ENOMEM;
+5 −41
Original line number Diff line number Diff line
@@ -345,10 +345,12 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
			prop->hw_queues_props[i].driver_only = 0;
			prop->hw_queues_props[i].requires_kernel_cb = 1;
			prop->hw_queues_props[i].supports_sync_stream = 1;
		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
			prop->hw_queues_props[i].driver_only = 1;
			prop->hw_queues_props[i].requires_kernel_cb = 0;
			prop->hw_queues_props[i].supports_sync_stream = 0;
		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
			prop->hw_queues_props[i].driver_only = 0;
@@ -357,6 +359,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
			prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
			prop->hw_queues_props[i].driver_only = 0;
			prop->hw_queues_props[i].requires_kernel_cb = 0;
			prop->hw_queues_props[i].supports_sync_stream = 0;
		}
	}

@@ -364,7 +367,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
		prop->hw_queues_props[i].type = QUEUE_TYPE_NA;

	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;

	prop->sync_stream_first_sob = 0;
	prop->sync_stream_first_mon = 0;
	prop->dram_base_address = DRAM_PHYS_BASE;
	prop->dram_size = GAUDI_HBM_SIZE_32GB;
	prop->dram_end_address = prop->dram_base_address +
@@ -6296,44 +6300,6 @@ static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
	return gaudi_cq_assignment[cq_idx];
}

static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
{
	struct gaudi_device *gaudi = hdev->asic_specific;
	struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
	struct hl_hw_sob *hw_sob;
	int sob, ext_idx = gaudi->ext_queue_idx++;

	/*
	 * The external queues might not sit sequentially, hence use the
	 * real external queue index for the SOB/MON base id.
	 */
	hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
	hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
	hw_queue->next_sob_val = 1;
	hw_queue->curr_sob_offset = 0;

	for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
		hw_sob = &hw_queue->hw_sob[sob];
		hw_sob->hdev = hdev;
		hw_sob->sob_id = hw_queue->base_sob_id + sob;
		hw_sob->q_idx = q_idx;
		kref_init(&hw_sob->kref);
	}
}

static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
{
	struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];

	/*
	 * In case we got here due to a stuck CS, the refcnt might be bigger
	 * than 1 and therefore we reset it.
	 */
	kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
	hw_queue->curr_sob_offset = 0;
	hw_queue->next_sob_val = 1;
}

static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
{
	return sizeof(struct packet_msg_short) +
@@ -6636,8 +6602,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
	.read_device_fw_version = gaudi_read_device_fw_version,
	.load_firmware_to_device = gaudi_load_firmware_to_device,
	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
	.ext_queue_init = gaudi_ext_queue_init,
	.ext_queue_reset = gaudi_ext_queue_reset,
	.get_signal_cb_size = gaudi_get_signal_cb_size,
	.get_wait_cb_size = gaudi_get_wait_cb_size,
	.gen_signal_cb = gaudi_gen_signal_cb,
+0 −2
Original line number Diff line number Diff line
@@ -234,7 +234,6 @@ struct gaudi_internal_qman_info {
 *                      engine.
 * @multi_msi_mode: whether we are working in multi MSI single MSI mode.
 *                  Multi MSI is possible only with IOMMU enabled.
 * @ext_queue_idx: helper index for external queues initialization.
 * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
 *                    8-bit value so use u8.
 */
@@ -255,7 +254,6 @@ struct gaudi_device {
	u32				events_stat_aggregate[GAUDI_EVENT_SIZE];
	u32				hw_cap_initialized;
	u8				multi_msi_mode;
	u8				ext_queue_idx;
	u8				mmu_cache_inv_pi;
};

+0 −12
Original line number Diff line number Diff line
@@ -5156,16 +5156,6 @@ u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
	return cq_idx;
}

static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx)
{

}

static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
{

}

static u32 goya_get_signal_cb_size(struct hl_device *hdev)
{
	return 0;
@@ -5279,8 +5269,6 @@ static const struct hl_asic_funcs goya_funcs = {
	.read_device_fw_version = goya_read_device_fw_version,
	.load_firmware_to_device = goya_load_firmware_to_device,
	.load_boot_fit_to_device = goya_load_boot_fit_to_device,
	.ext_queue_init = goya_ext_queue_init,
	.ext_queue_reset = goya_ext_queue_reset,
	.get_signal_cb_size = goya_get_signal_cb_size,
	.get_wait_cb_size = goya_get_wait_cb_size,
	.gen_signal_cb = goya_gen_signal_cb,
+15 −4
Original line number Diff line number Diff line
@@ -50,6 +50,10 @@
/* MMU */
#define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */

/*
 * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
 * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
 */
#define HL_RSVD_SOBS			4
#define HL_RSVD_MONS			2

@@ -141,11 +145,13 @@ struct hl_hw_sob {
 *               false otherwise.
 * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
 *                      queue, false otherwise (a CB address must be provided).
 * @supports_sync_stream: True if queue supports sync stream
 */
struct hw_queue_properties {
	enum hl_queue_type	type;
	u8			driver_only;
	u8			requires_kernel_cb;
	u8			supports_sync_stream;
};

/**
@@ -245,6 +251,9 @@ struct hl_mmu_properties {
 * @cb_pool_cb_cnt: number of CBs in the CB pool.
 * @cb_pool_cb_size: size of each CB in the CB pool.
 * @tpc_enabled_mask: which TPCs are enabled.
 * @sync_stream_first_sob: first sync object available for sync stream use
 * @sync_stream_first_mon: first monitor available for sync stream use
 * @tpc_enabled_mask: which TPCs are enabled.
 * @completion_queues_count: number of completion queues.
 */
struct asic_fixed_properties {
@@ -286,6 +295,8 @@ struct asic_fixed_properties {
	u32				cb_pool_cb_cnt;
	u32				cb_pool_cb_size;
	u32				max_pending_cs;
	u16				sync_stream_first_sob;
	u16				sync_stream_first_mon;
	u8				tpc_enabled_mask;
	u8				completion_queues_count;
};
@@ -423,6 +434,7 @@ struct hl_cs_job;
 *         exist).
 * @curr_sob_offset: the id offset to the currently used SOB from the
 *                   HL_RSVD_SOBS that are being used by this queue.
 * @supports_sync_stream: True if queue supports sync stream
 */
struct hl_hw_queue {
	struct hl_hw_sob	hw_sob[HL_RSVD_SOBS];
@@ -441,6 +453,7 @@ struct hl_hw_queue {
	u16			base_mon_id;
	u8			valid;
	u8			curr_sob_offset;
	u8			supports_sync_stream;
};

/**
@@ -603,8 +616,6 @@ enum hl_pll_frequency {
 *                          contained in registers
 * @load_firmware_to_device: load the firmware to the device's memory
 * @load_boot_fit_to_device: load boot fit to device's memory
 * @ext_queue_init: Initialize the given external queue.
 * @ext_queue_reset: Reset the given external queue.
 * @get_signal_cb_size: Get signal CB size.
 * @get_wait_cb_size: Get wait CB size.
 * @gen_signal_cb: Generate a signal CB.
@@ -707,8 +718,6 @@ struct hl_asic_funcs {
					enum hl_fw_component fwc);
	int (*load_firmware_to_device)(struct hl_device *hdev);
	int (*load_boot_fit_to_device)(struct hl_device *hdev);
	void (*ext_queue_init)(struct hl_device *hdev, u32 hw_queue_id);
	void (*ext_queue_reset)(struct hl_device *hdev, u32 hw_queue_id);
	u32 (*get_signal_cb_size)(struct hl_device *hdev);
	u32 (*get_wait_cb_size)(struct hl_device *hdev);
	void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
@@ -1436,6 +1445,7 @@ struct hl_device_idle_busy_ts {
 * @cdev_sysfs_created: were char devices and sysfs nodes created.
 * @stop_on_err: true if engines should stop on error.
 * @supports_sync_stream: is sync stream supported.
 * @sync_stream_queue_idx: helper index for sync stream queues initialization.
 * @supports_coresight: is CoreSight supported.
 * @supports_soft_reset: is soft reset supported.
 */
@@ -1523,6 +1533,7 @@ struct hl_device {
	u8				cdev_sysfs_created;
	u8				stop_on_err;
	u8				supports_sync_stream;
	u8				sync_stream_queue_idx;
	u8				supports_coresight;
	u8				supports_soft_reset;

Loading