Commit c15850c7 authored by Luo bin's avatar Luo bin Committed by David S. Miller
Browse files

hinic: add support to handle hw abnormal event



add support to handle hw abnormal event such as hardware failure,
cable unplugged,link error

Signed-off-by: default avatarLuo bin <luobin9@huawei.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent aff75431
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -98,10 +98,14 @@ struct hinic_dev {
	int				lb_pkt_len;
	u8				*lb_test_rx_buf;
	struct devlink			*devlink;
	bool				cable_unplugged;
	bool				module_unrecognized;
};

struct hinic_devlink_priv {
	struct hinic_hwdev		*hwdev;
	struct devlink_health_reporter  *hw_fault_reporter;
	struct devlink_health_reporter  *fw_fault_reporter;
};

#endif
+283 −3
Original line number Diff line number Diff line
@@ -16,9 +16,9 @@
#include <net/devlink.h>
#include <linux/firmware.h>

#include "hinic_dev.h"
#include "hinic_port.h"
#include "hinic_devlink.h"
#include "hinic_hw_dev.h"

static bool check_image_valid(struct hinic_devlink_priv *priv, const u8 *buf,
			      u32 image_size, struct host_image_st *host_image)
@@ -317,12 +317,292 @@ void hinic_devlink_free(struct devlink *devlink)
	devlink_free(devlink);
}

int hinic_devlink_register(struct devlink *devlink, struct device *dev)
int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev)
{
	struct devlink *devlink = priv_to_devlink(priv);

	return devlink_register(devlink, dev);
}

void hinic_devlink_unregister(struct devlink *devlink)
void hinic_devlink_unregister(struct hinic_devlink_priv *priv)
{
	struct devlink *devlink = priv_to_devlink(priv);

	devlink_unregister(devlink);
}

static int chip_fault_show(struct devlink_fmsg *fmsg,
			   struct hinic_fault_event *event)
{
	char fault_level[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = {
		"fatal", "reset", "flr", "general", "suggestion"};
	char level_str[FAULT_SHOW_STR_LEN + 1] = {0};
	u8 level;
	int err;

	level = event->event.chip.err_level;
	if (level < FAULT_LEVEL_MAX)
		strncpy(level_str, fault_level[level], strlen(fault_level[level]));
	else
		strncpy(level_str, "Unknown", strlen("Unknown"));

	if (level == FAULT_LEVEL_SERIOUS_FLR) {
		err = devlink_fmsg_u32_pair_put(fmsg, "Function level err func_id",
						(u32)event->event.chip.func_id);
		if (err)
			return err;
	}

	err = devlink_fmsg_u8_pair_put(fmsg, "module_id", event->event.chip.node_id);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "err_type", (u32)event->event.chip.err_type);
	if (err)
		return err;

	err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "err_csr_addr",
					event->event.chip.err_csr_addr);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "err_csr_value",
					event->event.chip.err_csr_value);
	if (err)
		return err;

	return 0;
}

static int fault_report_show(struct devlink_fmsg *fmsg,
			     struct hinic_fault_event *event)
{
	char fault_type[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = {
		"chip", "ucode", "mem rd timeout", "mem wr timeout",
		"reg rd timeout", "reg wr timeout", "phy fault"};
	char type_str[FAULT_SHOW_STR_LEN + 1] = {0};
	int err;

	if (event->type < FAULT_TYPE_MAX)
		strncpy(type_str, fault_type[event->type], strlen(fault_type[event->type]));
	else
		strncpy(type_str, "Unknown", strlen("Unknown"));

	err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str);
	if (err)
		return err;

	err = devlink_fmsg_binary_pair_put(fmsg, "Fault raw data",
					   event->event.val, sizeof(event->event.val));
	if (err)
		return err;

	switch (event->type) {
	case FAULT_TYPE_CHIP:
		err = chip_fault_show(fmsg, event);
		if (err)
			return err;
		break;
	case FAULT_TYPE_UCODE:
		err = devlink_fmsg_u8_pair_put(fmsg, "Cause_id", event->event.ucode.cause_id);
		if (err)
			return err;
		err = devlink_fmsg_u8_pair_put(fmsg, "core_id", event->event.ucode.core_id);
		if (err)
			return err;
		err = devlink_fmsg_u8_pair_put(fmsg, "c_id", event->event.ucode.c_id);
		if (err)
			return err;
		err = devlink_fmsg_u8_pair_put(fmsg, "epc", event->event.ucode.epc);
		if (err)
			return err;
		break;
	case FAULT_TYPE_MEM_RD_TIMEOUT:
	case FAULT_TYPE_MEM_WR_TIMEOUT:
		err = devlink_fmsg_u32_pair_put(fmsg, "Err_csr_ctrl",
						event->event.mem_timeout.err_csr_ctrl);
		if (err)
			return err;
		err = devlink_fmsg_u32_pair_put(fmsg, "err_csr_data",
						event->event.mem_timeout.err_csr_data);
		if (err)
			return err;
		err = devlink_fmsg_u32_pair_put(fmsg, "ctrl_tab",
						event->event.mem_timeout.ctrl_tab);
		if (err)
			return err;
		err = devlink_fmsg_u32_pair_put(fmsg, "mem_index",
						event->event.mem_timeout.mem_index);
		if (err)
			return err;
		break;
	case FAULT_TYPE_REG_RD_TIMEOUT:
	case FAULT_TYPE_REG_WR_TIMEOUT:
		err = devlink_fmsg_u32_pair_put(fmsg, "Err_csr", event->event.reg_timeout.err_csr);
		if (err)
			return err;
		break;
	case FAULT_TYPE_PHY_FAULT:
		err = devlink_fmsg_u8_pair_put(fmsg, "Op_type", event->event.phy_fault.op_type);
		if (err)
			return err;
		err = devlink_fmsg_u8_pair_put(fmsg, "port_id", event->event.phy_fault.port_id);
		if (err)
			return err;
		err = devlink_fmsg_u8_pair_put(fmsg, "dev_ad", event->event.phy_fault.dev_ad);
		if (err)
			return err;

		err = devlink_fmsg_u32_pair_put(fmsg, "csr_addr", event->event.phy_fault.csr_addr);
		if (err)
			return err;
		err = devlink_fmsg_u32_pair_put(fmsg, "op_data", event->event.phy_fault.op_data);
		if (err)
			return err;
		break;
	default:
		break;
	}

	return 0;
}

static int hinic_hw_reporter_dump(struct devlink_health_reporter *reporter,
				  struct devlink_fmsg *fmsg, void *priv_ctx,
				  struct netlink_ext_ack *extack)
{
	if (priv_ctx)
		return fault_report_show(fmsg, priv_ctx);

	return 0;
}

static int mgmt_watchdog_report_show(struct devlink_fmsg *fmsg,
				     struct hinic_mgmt_watchdog_info *watchdog_info)
{
	int err;

	err = devlink_fmsg_u32_pair_put(fmsg, "Mgmt deadloop time_h", watchdog_info->curr_time_h);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "time_l", watchdog_info->curr_time_l);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "task_id", watchdog_info->task_id);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "sp", watchdog_info->sp);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "stack_current_used", watchdog_info->curr_used);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "peak_used", watchdog_info->peak_used);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "\n Overflow_flag", watchdog_info->is_overflow);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "stack_top", watchdog_info->stack_top);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "stack_bottom", watchdog_info->stack_bottom);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "mgmt_pc", watchdog_info->pc);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "lr", watchdog_info->lr);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "cpsr", watchdog_info->cpsr);
	if (err)
		return err;

	err = devlink_fmsg_binary_pair_put(fmsg, "Mgmt register info",
					   watchdog_info->reg, sizeof(watchdog_info->reg));
	if (err)
		return err;

	err = devlink_fmsg_binary_pair_put(fmsg, "Mgmt dump stack(start from sp)",
					   watchdog_info->data, sizeof(watchdog_info->data));
	if (err)
		return err;

	return 0;
}

static int hinic_fw_reporter_dump(struct devlink_health_reporter *reporter,
				  struct devlink_fmsg *fmsg, void *priv_ctx,
				  struct netlink_ext_ack *extack)
{
	if (priv_ctx)
		return mgmt_watchdog_report_show(fmsg, priv_ctx);

	return 0;
}

static const struct devlink_health_reporter_ops hinic_hw_fault_reporter_ops = {
	.name = "hw",
	.dump = hinic_hw_reporter_dump,
};

static const struct devlink_health_reporter_ops hinic_fw_fault_reporter_ops = {
	.name = "fw",
	.dump = hinic_fw_reporter_dump,
};

int hinic_health_reporters_create(struct hinic_devlink_priv *priv)
{
	struct devlink *devlink = priv_to_devlink(priv);

	priv->hw_fault_reporter =
		devlink_health_reporter_create(devlink, &hinic_hw_fault_reporter_ops,
					       0, priv);
	if (IS_ERR(priv->hw_fault_reporter)) {
		dev_warn(&priv->hwdev->hwif->pdev->dev, "Failed to create hw fault reporter, err: %ld\n",
			 PTR_ERR(priv->hw_fault_reporter));
		return PTR_ERR(priv->hw_fault_reporter);
	}

	priv->fw_fault_reporter =
		devlink_health_reporter_create(devlink, &hinic_fw_fault_reporter_ops,
					       0, priv);
	if (IS_ERR(priv->fw_fault_reporter)) {
		dev_warn(&priv->hwdev->hwif->pdev->dev, "Failed to create fw fault reporter, err: %ld\n",
			 PTR_ERR(priv->fw_fault_reporter));
		devlink_health_reporter_destroy(priv->hw_fault_reporter);
		priv->hw_fault_reporter = NULL;
		return PTR_ERR(priv->fw_fault_reporter);
	}

	return 0;
}

void hinic_health_reporters_destroy(struct hinic_devlink_priv *priv)
{
	if (!IS_ERR_OR_NULL(priv->fw_fault_reporter)) {
		devlink_health_reporter_destroy(priv->fw_fault_reporter);
		priv->fw_fault_reporter = NULL;
	}

	if (!IS_ERR_OR_NULL(priv->hw_fault_reporter)) {
		devlink_health_reporter_destroy(priv->hw_fault_reporter);
		priv->hw_fault_reporter = NULL;
	}
}
+6 −2
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#define __HINIC_DEVLINK_H__

#include <net/devlink.h>
#include "hinic_dev.h"

#define MAX_FW_TYPE_NUM 30
#define HINIC_MAGIC_NUM 0x18221100
@@ -109,7 +110,10 @@ struct host_image_st {

struct devlink *hinic_devlink_alloc(void);
void hinic_devlink_free(struct devlink *devlink);
int hinic_devlink_register(struct devlink *devlink, struct device *dev);
void hinic_devlink_unregister(struct devlink *devlink);
int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev);
void hinic_devlink_unregister(struct hinic_devlink_priv *priv);

int hinic_health_reporters_create(struct hinic_devlink_priv *priv);
void hinic_health_reporters_destroy(struct hinic_devlink_priv *priv);

#endif /* __HINIC_DEVLINK_H__ */
+20 −0
Original line number Diff line number Diff line
@@ -1766,6 +1766,25 @@ static int hinic_get_module_eeprom(struct net_device *netdev,
	return 0;
}

static int
hinic_get_link_ext_state(struct net_device *netdev,
			 struct ethtool_link_ext_state_info *link_ext_state_info)
{
	struct hinic_dev *nic_dev = netdev_priv(netdev);

	if (netif_carrier_ok(netdev))
		return -ENODATA;

	if (nic_dev->cable_unplugged)
		link_ext_state_info->link_ext_state =
			ETHTOOL_LINK_EXT_STATE_NO_CABLE;
	else if (nic_dev->module_unrecognized)
		link_ext_state_info->link_ext_state =
			ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH;

	return 0;
}

static const struct ethtool_ops hinic_ethtool_ops = {
	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
				     ETHTOOL_COALESCE_RX_MAX_FRAMES |
@@ -1776,6 +1795,7 @@ static const struct ethtool_ops hinic_ethtool_ops = {
	.set_link_ksettings = hinic_set_link_ksettings,
	.get_drvinfo = hinic_get_drvinfo,
	.get_link = ethtool_op_get_link,
	.get_link_ext_state = hinic_get_link_ext_state,
	.get_ringparam = hinic_get_ringparam,
	.set_ringparam = hinic_set_ringparam,
	.get_coalesce = hinic_get_coalesce,
+143 −5
Original line number Diff line number Diff line
@@ -16,8 +16,11 @@
#include <linux/log2.h>
#include <linux/err.h>
#include <linux/netdevice.h>
#include <net/devlink.h>

#include "hinic_devlink.h"
#include "hinic_sriov.h"
#include "hinic_dev.h"
#include "hinic_hw_if.h"
#include "hinic_hw_eqs.h"
#include "hinic_hw_mgmt.h"
@@ -621,6 +624,113 @@ static void nic_mgmt_msg_handler(void *handle, u8 cmd, void *buf_in,
	nic_cb->cb_state &= ~HINIC_CB_RUNNING;
}

static void hinic_comm_recv_mgmt_self_cmd_reg(struct hinic_pfhwdev *pfhwdev,
					      u8 cmd,
					      comm_mgmt_self_msg_proc proc)
{
	u8 cmd_idx;

	cmd_idx = pfhwdev->proc.cmd_num;
	if (cmd_idx >= HINIC_COMM_SELF_CMD_MAX) {
		dev_err(&pfhwdev->hwdev.hwif->pdev->dev,
			"Register recv mgmt process failed, cmd: 0x%x\n", cmd);
		return;
	}

	pfhwdev->proc.info[cmd_idx].cmd = cmd;
	pfhwdev->proc.info[cmd_idx].proc = proc;
	pfhwdev->proc.cmd_num++;
}

static void hinic_comm_recv_mgmt_self_cmd_unreg(struct hinic_pfhwdev *pfhwdev,
						u8 cmd)
{
	u8 cmd_idx;

	cmd_idx = pfhwdev->proc.cmd_num;
	if (cmd_idx >= HINIC_COMM_SELF_CMD_MAX) {
		dev_err(&pfhwdev->hwdev.hwif->pdev->dev, "Unregister recv mgmt process failed, cmd: 0x%x\n",
			cmd);
		return;
	}

	for (cmd_idx = 0; cmd_idx < HINIC_COMM_SELF_CMD_MAX; cmd_idx++) {
		if (cmd == pfhwdev->proc.info[cmd_idx].cmd) {
			pfhwdev->proc.info[cmd_idx].cmd = 0;
			pfhwdev->proc.info[cmd_idx].proc = NULL;
			pfhwdev->proc.cmd_num--;
		}
	}
}

static void comm_mgmt_msg_handler(void *handle, u8 cmd, void *buf_in,
				  u16 in_size, void *buf_out, u16 *out_size)
{
	struct hinic_pfhwdev *pfhwdev = handle;
	u8 cmd_idx;

	for (cmd_idx = 0; cmd_idx < pfhwdev->proc.cmd_num; cmd_idx++) {
		if (cmd == pfhwdev->proc.info[cmd_idx].cmd) {
			if (!pfhwdev->proc.info[cmd_idx].proc) {
				dev_warn(&pfhwdev->hwdev.hwif->pdev->dev,
					 "PF recv mgmt comm msg handle null, cmd: 0x%x\n",
					 cmd);
			} else {
				pfhwdev->proc.info[cmd_idx].proc
					(&pfhwdev->hwdev, buf_in, in_size,
					 buf_out, out_size);
			}

			return;
		}
	}

	dev_warn(&pfhwdev->hwdev.hwif->pdev->dev, "Received unknown mgmt cpu event: 0x%x\n",
		 cmd);

	*out_size = 0;
}

/* pf fault report event */
static void pf_fault_event_handler(void *dev, void *buf_in, u16 in_size,
				   void *buf_out, u16 *out_size)
{
	struct hinic_cmd_fault_event *fault_event = buf_in;
	struct hinic_hwdev *hwdev = dev;

	if (in_size != sizeof(*fault_event)) {
		dev_err(&hwdev->hwif->pdev->dev, "Invalid fault event report, length: %d, should be %zu\n",
			in_size, sizeof(*fault_event));
		return;
	}

	if (!hwdev->devlink_dev || IS_ERR_OR_NULL(hwdev->devlink_dev->hw_fault_reporter))
		return;

	devlink_health_report(hwdev->devlink_dev->hw_fault_reporter,
			      "HW fatal error reported", &fault_event->event);
}

static void mgmt_watchdog_timeout_event_handler(void *dev,
						void *buf_in, u16 in_size,
						void *buf_out, u16 *out_size)
{
	struct hinic_mgmt_watchdog_info *watchdog_info = buf_in;
	struct hinic_hwdev *hwdev = dev;

	if (in_size != sizeof(*watchdog_info)) {
		dev_err(&hwdev->hwif->pdev->dev, "Invalid mgmt watchdog report, length: %d, should be %zu\n",
			in_size, sizeof(*watchdog_info));
		return;
	}

	if (!hwdev->devlink_dev || IS_ERR_OR_NULL(hwdev->devlink_dev->fw_fault_reporter))
		return;

	devlink_health_report(hwdev->devlink_dev->fw_fault_reporter,
			      "FW fatal error reported", watchdog_info);
}

/**
 * init_pfhwdev - Initialize the extended components of PF
 * @pfhwdev: the HW device for PF
@@ -640,20 +750,37 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev)
		return err;
	}

	err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev);
	if (err) {
		dev_err(&hwif->pdev->dev, "Failed to register devlink\n");
		hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
		return err;
	}

	err = hinic_func_to_func_init(hwdev);
	if (err) {
		dev_err(&hwif->pdev->dev, "Failed to init mailbox\n");
		hinic_devlink_unregister(hwdev->devlink_dev);
		hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
		return err;
	}

	if (!HINIC_IS_VF(hwif))
	if (!HINIC_IS_VF(hwif)) {
		hinic_register_mgmt_msg_cb(&pfhwdev->pf_to_mgmt,
					   HINIC_MOD_L2NIC, pfhwdev,
					   nic_mgmt_msg_handler);
	else
		hinic_register_mgmt_msg_cb(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
					   pfhwdev, comm_mgmt_msg_handler);
		hinic_comm_recv_mgmt_self_cmd_reg(pfhwdev,
						  HINIC_COMM_CMD_FAULT_REPORT,
						  pf_fault_event_handler);
		hinic_comm_recv_mgmt_self_cmd_reg
			(pfhwdev, HINIC_COMM_CMD_WATCHDOG_INFO,
			 mgmt_watchdog_timeout_event_handler);
	} else {
		hinic_register_vf_mbox_cb(hwdev, HINIC_MOD_L2NIC,
					  nic_mgmt_msg_handler);
	}

	hinic_set_pf_action(hwif, HINIC_PF_MGMT_ACTIVE);

@@ -670,14 +797,23 @@ static void free_pfhwdev(struct hinic_pfhwdev *pfhwdev)

	hinic_set_pf_action(hwdev->hwif, HINIC_PF_MGMT_INIT);

	if (!HINIC_IS_VF(hwdev->hwif))
	if (!HINIC_IS_VF(hwdev->hwif)) {
		hinic_comm_recv_mgmt_self_cmd_unreg(pfhwdev,
						    HINIC_COMM_CMD_WATCHDOG_INFO);
		hinic_comm_recv_mgmt_self_cmd_unreg(pfhwdev,
						    HINIC_COMM_CMD_FAULT_REPORT);
		hinic_unregister_mgmt_msg_cb(&pfhwdev->pf_to_mgmt,
					     HINIC_MOD_COMM);
		hinic_unregister_mgmt_msg_cb(&pfhwdev->pf_to_mgmt,
					     HINIC_MOD_L2NIC);
	else
	} else {
		hinic_unregister_vf_mbox_cb(hwdev, HINIC_MOD_L2NIC);
	}

	hinic_func_to_func_free(hwdev);

	hinic_devlink_unregister(hwdev->devlink_dev);

	hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
}

@@ -777,7 +913,7 @@ int hinic_set_interrupt_cfg(struct hinic_hwdev *hwdev,
 *
 * Initialize the NIC HW device and return a pointer to it
 **/
struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev)
struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev, struct devlink *devlink)
{
	struct hinic_pfhwdev *pfhwdev;
	struct hinic_hwdev *hwdev;
@@ -802,6 +938,8 @@ struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev)

	hwdev = &pfhwdev->hwdev;
	hwdev->hwif = hwif;
	hwdev->devlink_dev = devlink_priv(devlink);
	hwdev->devlink_dev->hwdev = hwdev;

	err = init_msix(hwdev);
	if (err) {
Loading