Commit e9730763 authored by Oded Gabbay's avatar Oded Gabbay
Browse files

habanalabs: add uapi to retrieve aggregate H/W events



Add a new opcode to INFO IOCTL to retrieve aggregate H/W events. i.e. the
events counters are NOT cleared upon device reset, but count from the
loading of the driver.

Add the code to support it in the device event handling function.

Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
parent 75b3cb2b
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -4469,6 +4469,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
	struct goya_device *goya = hdev->asic_specific;

	goya->events_stat[event_type]++;
	goya->events_stat_aggregate[event_type]++;

	switch (event_type) {
	case GOYA_ASYNC_EVENT_ID_PCIE_IF:
@@ -4550,12 +4551,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
	}
}

void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
{
	struct goya_device *goya = hdev->asic_specific;

	*size = (u32) sizeof(goya->events_stat);
	if (aggregate) {
		*size = (u32) sizeof(goya->events_stat_aggregate);
		return goya->events_stat_aggregate;
	}

	*size = (u32) sizeof(goya->events_stat);
	return goya->events_stat;
}

+2 −1
Original line number Diff line number Diff line
@@ -162,6 +162,7 @@ struct goya_device {

	u64		ddr_bar_cur_addr;
	u32		events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
	u32		events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE];
	u32		hw_cap_initialized;
	u8		device_cpu_mmu_mappings_done;
};
@@ -215,7 +216,7 @@ int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);

void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);

void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
				u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
+2 −1
Original line number Diff line number Diff line
@@ -558,7 +558,8 @@ struct hl_asic_funcs {
				struct hl_eq_entry *eq_entry);
	void (*set_pll_profile)(struct hl_device *hdev,
			enum hl_pll_frequency freq);
	void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
	void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
				u32 *size);
	u64 (*read_pte)(struct hl_device *hdev, u64 addr);
	void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
	void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard);
+8 −3
Original line number Diff line number Diff line
@@ -75,7 +75,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
		min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
}

static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
static int hw_events_info(struct hl_device *hdev, bool aggregate,
			struct hl_info_args *args)
{
	u32 size, max_size = args->return_size;
	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
@@ -84,7 +85,7 @@ static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
	if ((!max_size) || (!out))
		return -EINVAL;

	arr = hdev->asic_funcs->get_events_stat(hdev, &size);
	arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);

	return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
}
@@ -251,7 +252,7 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,

	switch (args->op) {
	case HL_INFO_HW_EVENTS:
		rc = hw_events_info(hdev, args);
		rc = hw_events_info(hdev, false, args);
		break;

	case HL_INFO_DRAM_USAGE:
@@ -266,6 +267,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
		rc = device_utilization(hdev, args);
		break;

	case HL_INFO_HW_EVENTS_AGGREGATE:
		rc = hw_events_info(hdev, true, args);
		break;

	default:
		dev_err(dev, "Invalid request %d\n", args->op);
		rc = -ENOTTY;
+3 −0
Original line number Diff line number Diff line
@@ -93,6 +93,8 @@ enum hl_device_status {
 *                              The period can be between 100ms to 1s, in
 *                              resolution of 100ms. The return value is a
 *                              percentage of the utilization rate.
 * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
 *                               event occurred since the driver was loaded.
 */
#define HL_INFO_HW_IP_INFO		0
#define HL_INFO_HW_EVENTS		1
@@ -100,6 +102,7 @@ enum hl_device_status {
#define HL_INFO_HW_IDLE			3
#define HL_INFO_DEVICE_STATUS		4
#define HL_INFO_DEVICE_UTILIZATION	6
#define HL_INFO_HW_EVENTS_AGGREGATE	7

#define HL_INFO_VERSION_MAX_LEN	128