Commit 9f306491 authored by farah kassabri's avatar farah kassabri Committed by Oded Gabbay
Browse files

habanalabs: add support for getting device total energy



Add driver implementation for reading the total energy consumption
from the device ARM FW.

Signed-off-by: default avatarfarah kassabri <fkassabri@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 56004701
Loading
Loading
Loading
Loading
+24 −0
Original line number Diff line number Diff line
@@ -411,6 +411,30 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
	return rc;
}

int hl_fw_armcp_total_energy_get(struct hl_device *hdev,
			u64 *total_energy)
{
	struct armcp_packet pkt = {};
	long result;
	int rc;

	pkt.ctl = cpu_to_le32(ARMCP_PACKET_TOTAL_ENERGY_GET <<
			ARMCP_PKT_CTL_OPCODE_SHIFT);

	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
	if (rc) {
		dev_err(hdev->dev,
			"Failed to handle ArmCP total energy pkt, error %d\n",
				rc);
		return rc;
	}

	*total_energy = result;

	return rc;
}

static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
{
	u32 err_val;
+2 −0
Original line number Diff line number Diff line
@@ -1852,6 +1852,8 @@ int hl_fw_armcp_info_get(struct hl_device *hdev);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
		struct hl_info_pci_counters *counters);
int hl_fw_armcp_total_energy_get(struct hl_device *hdev,
			u64 *total_energy);
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
			u32 boot_err0_reg, bool skip_bmc,
+24 −0
Original line number Diff line number Diff line
@@ -357,6 +357,27 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
			sizeof(sm_info))) ? -EFAULT : 0;
}

static int total_energy_consumption_info(struct hl_fpriv *hpriv,
			struct hl_info_args *args)
{
	struct hl_device *hdev = hpriv->hdev;
	struct hl_info_energy total_energy = {0};
	u32 max_size = args->return_size;
	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
	int rc;

	if ((!max_size) || (!out))
		return -EINVAL;

	rc = hl_fw_armcp_total_energy_get(hdev,
			&total_energy.total_energy_consumption);
	if (rc)
		return rc;

	return copy_to_user(out, &total_energy,
		min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
}

static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
				struct device *dev)
{
@@ -429,6 +450,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
	case HL_INFO_SYNC_MANAGER:
		return sync_manager_info(hpriv, args);

	case HL_INFO_TOTAL_ENERGY:
		return total_energy_consumption_info(hpriv, args);

	default:
		dev_err(dev, "Invalid request %d\n", args->op);
		rc = -ENOTTY;
+1 −0
Original line number Diff line number Diff line
@@ -245,6 +245,7 @@ enum armcp_packet_id {
	ARMCP_PACKET_CURRENT_SET,		/* sysfs */
	ARMCP_PACKET_PCIE_THROUGHPUT_GET,	/* internal */
	ARMCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
	ARMCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
};

#define ARMCP_PACKET_FENCE_VAL	0xFE8CE7A5
+10 −0
Original line number Diff line number Diff line
@@ -267,6 +267,7 @@ enum hl_device_status {
 * HL_INFO_PCI_COUNTERS  - Retrieve PCI counters
 * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
 * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
 * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
 */
#define HL_INFO_HW_IP_INFO		0
#define HL_INFO_HW_EVENTS		1
@@ -282,6 +283,7 @@ enum hl_device_status {
#define HL_INFO_PCI_COUNTERS		12
#define HL_INFO_CLK_THROTTLE_REASON	13
#define HL_INFO_SYNC_MANAGER		14
#define HL_INFO_TOTAL_ENERGY		15

#define HL_INFO_VERSION_MAX_LEN	128
#define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -375,6 +377,14 @@ struct hl_info_clk_throttle {
	__u32 clk_throttling_reason;
};

/**
 * struct hl_info_energy - device energy information
 * @total_energy_consumption: total device energy consumption
 */
struct hl_info_energy {
	__u64 total_energy_consumption;
};

/**
 * struct hl_info_sync_manager - sync manager information
 * @first_available_sync_object: first available sob