Commit 6a94785f authored by Bjorn Helgaas's avatar Bjorn Helgaas
Browse files

Merge branch 'pci/err'

- Stop writing AER Capability when we don't own it (Sean V Kelley)

- Bind RCEC devices to the Port driver (Qiuxu Zhuo)

- Cache the RCEC RA Capability offset (Sean V Kelley)

- Add pci_walk_bridge() (Sean V Kelley)

- Clear AER status only when we control AER (Sean V Kelley)

- Recover from RCEC AER errors (Sean V Kelley)

- Add pcie_link_rcec() to associate RCiEPs with RCECs (Sean V Kelley)

- Recover from RCiEP AER errors (Sean V Kelley)

- Add pcie_walk_rcec() for RCEC AER handling (Sean V Kelley)

- Add pcie_walk_rcec() for RCEC PME handling (Sean V Kelley)

- Add RCEC AER error injection support (Qiuxu Zhuo)

* pci/err:
  PCI/AER: Add RCEC AER error injection support
  PCI/PME: Add pcie_walk_rcec() to RCEC PME handling
  PCI/AER: Add pcie_walk_rcec() to RCEC AER handling
  PCI/ERR: Recover from RCiEP AER errors
  PCI/ERR: Add pcie_link_rcec() to associate RCiEPs
  PCI/ERR: Recover from RCEC AER errors
  PCI/ERR: Clear AER status only when we control AER
  PCI/ERR: Add pci_walk_bridge() to pcie_do_recovery()
  PCI/ERR: Avoid negated conditional for clarity
  PCI/ERR: Use "bridge" for clarity in pcie_do_recovery()
  PCI/ERR: Simplify by computing pci_pcie_type() once
  PCI/ERR: Simplify by using pci_upstream_bridge()
  PCI/ERR: Rename reset_link() to reset_subordinates()
  PCI/ERR: Cache RCEC EA Capability offset in pci_init_capabilities()
  PCI/ERR: Bind RCEC devices to the Root Port driver
  PCI/AER: Write AER Capability only when we control it
parents e8722508 d292dd0e
Loading
Loading
Loading
Loading
+27 −2
Original line number Diff line number Diff line
@@ -450,6 +450,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
#endif	/* CONFIG_PCIEAER */

#ifdef CONFIG_PCIEPORTBUS
/* Cached RCEC Endpoint Association */
struct rcec_ea {
	u8		nextbusn;
	u8		lastbusn;
	u32		bitmap;
};
#endif

#ifdef CONFIG_PCIE_DPC
void pci_save_dpc_state(struct pci_dev *dev);
void pci_restore_dpc_state(struct pci_dev *dev);
@@ -462,6 +471,22 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {}
static inline void pci_dpc_init(struct pci_dev *pdev) {}
#endif

#ifdef CONFIG_PCIEPORTBUS
void pci_rcec_init(struct pci_dev *dev);
void pci_rcec_exit(struct pci_dev *dev);
void pcie_link_rcec(struct pci_dev *rcec);
void pcie_walk_rcec(struct pci_dev *rcec,
		    int (*cb)(struct pci_dev *, void *),
		    void *userdata);
#else
static inline void pci_rcec_init(struct pci_dev *dev) {}
static inline void pci_rcec_exit(struct pci_dev *dev) {}
static inline void pcie_link_rcec(struct pci_dev *rcec) {}
static inline void pcie_walk_rcec(struct pci_dev *rcec,
				  int (*cb)(struct pci_dev *, void *),
				  void *userdata) {}
#endif

#ifdef CONFIG_PCI_ATS
/* Address Translation Service */
void pci_ats_init(struct pci_dev *dev);
@@ -558,7 +583,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
/* PCI error reporting and recovery */
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
		pci_channel_state_t state,
			pci_ers_result_t (*reset_link)(struct pci_dev *pdev));
		pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev));

bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
#ifdef CONFIG_PCIEASPM
+1 −1
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
#
# Makefile for PCI Express features and port driver

pcieportdrv-y			:= portdrv_core.o portdrv_pci.o err.o
pcieportdrv-y			:= portdrv_core.o portdrv_pci.o err.o rcec.o

obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o

+75 −26
Original line number Diff line number Diff line
@@ -300,7 +300,8 @@ int pci_aer_raw_clear_status(struct pci_dev *dev)
		return -EIO;

	port_type = pci_pcie_type(dev);
	if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
	if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
	    port_type == PCI_EXP_TYPE_RC_EC) {
		pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status);
		pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status);
	}
@@ -595,7 +596,8 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
	if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
	     a == &dev_attr_aer_rootport_total_err_fatal.attr ||
	     a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
	    pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
	    ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
	     (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC)))
		return 0;

	return a->mode;
@@ -916,6 +918,9 @@ static bool find_source_device(struct pci_dev *parent,
	if (result)
		return true;

	if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC)
		pcie_walk_rcec(parent, find_device_iter, e_info);
	else
		pci_walk_bus(parent->subordinate, find_device_iter, e_info);

	if (!e_info->error_dev_num) {
@@ -1034,6 +1039,7 @@ EXPORT_SYMBOL_GPL(aer_recover_queue);
 */
int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
{
	int type = pci_pcie_type(dev);
	int aer = dev->aer_cap;
	int temp;

@@ -1052,8 +1058,9 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
			&info->mask);
		if (!(info->status & ~info->mask))
			return 0;
	} else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
	           pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
	} else if (type == PCI_EXP_TYPE_ROOT_PORT ||
		   type == PCI_EXP_TYPE_RC_EC ||
		   type == PCI_EXP_TYPE_DOWNSTREAM ||
		   info->severity == AER_NONFATAL) {

		/* Link is still healthy for IO reads */
@@ -1205,6 +1212,7 @@ static int set_device_error_reporting(struct pci_dev *dev, void *data)
	int type = pci_pcie_type(dev);

	if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
	    (type == PCI_EXP_TYPE_RC_EC) ||
	    (type == PCI_EXP_TYPE_UPSTREAM) ||
	    (type == PCI_EXP_TYPE_DOWNSTREAM)) {
		if (enable)
@@ -1229,9 +1237,12 @@ static void set_downstream_devices_error_reporting(struct pci_dev *dev,
{
	set_device_error_reporting(dev, &enable);

	if (!dev->subordinate)
		return;
	pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
		pcie_walk_rcec(dev, set_device_error_reporting, &enable);
	else if (dev->subordinate)
		pci_walk_bus(dev->subordinate, set_device_error_reporting,
			     &enable);

}

/**
@@ -1329,6 +1340,11 @@ static int aer_probe(struct pcie_device *dev)
	struct device *device = &dev->device;
	struct pci_dev *port = dev->port;

	/* Limit to Root Ports or Root Complex Event Collectors */
	if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
	    (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
		return -ENODEV;

	rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL);
	if (!rpc)
		return -ENOMEM;
@@ -1350,41 +1366,74 @@ static int aer_probe(struct pcie_device *dev)
}

/**
 * aer_root_reset - reset link on Root Port
 * @dev: pointer to Root Port's pci_dev data structure
 * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP
 * @dev: pointer to Root Port, RCEC, or RCiEP
 *
 * Invoked by Port Bus driver when performing link reset at Root Port.
 * Invoked by Port Bus driver when performing reset.
 */
static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
{
	int aer = dev->aer_cap;
	int type = pci_pcie_type(dev);
	struct pci_dev *root;
	int aer;
	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
	u32 reg32;
	int rc;

	/*
	 * Only Root Ports and RCECs have AER Root Command and Root Status
	 * registers.  If "dev" is an RCiEP, the relevant registers are in
	 * the RCEC.
	 */
	if (type == PCI_EXP_TYPE_RC_END)
		root = dev->rcec;
	else
		root = dev;

	/*
	 * If the platform retained control of AER, an RCiEP may not have
	 * an RCEC visible to us, so dev->rcec ("root") may be NULL.  In
	 * that case, firmware is responsible for these registers.
	 */
	aer = root ? root->aer_cap : 0;

	if ((host->native_aer || pcie_ports_native) && aer) {
		/* Disable Root's interrupt in response to error messages */
	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
		reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
	}

	if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
		if (pcie_has_flr(dev)) {
			rc = pcie_flr(dev);
			pci_info(dev, "has been reset (%d)\n", rc);
		} else {
			pci_info(dev, "not reset (no FLR support)\n");
			rc = -ENOTTY;
		}
	} else {
		rc = pci_bus_error_reset(dev);
	pci_info(dev, "Root Port link has been reset\n");
		pci_info(dev, "Root Port link has been reset (%d)\n", rc);
	}

	if ((host->native_aer || pcie_ports_native) && aer) {
		/* Clear Root Error Status */
	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &reg32);
	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
		pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
		pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);

		/* Enable Root Port's interrupt in response to error messages */
	pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
		pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
		reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
	pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
		pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
	}

	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}

static struct pcie_port_service_driver aerdriver = {
	.name		= "aer",
	.port_type	= PCI_EXP_TYPE_ROOT_PORT,
	.port_type	= PCIE_ANY_PORT,
	.service	= PCIE_PORT_SERVICE_AER,

	.probe		= aer_probe,
+4 −1
Original line number Diff line number Diff line
@@ -333,8 +333,11 @@ static int aer_inject(struct aer_error_inj *einj)
	if (!dev)
		return -ENODEV;
	rpdev = pcie_find_root_port(dev);
	/* If Root Port not found, try to find an RCEC */
	if (!rpdev)
		rpdev = dev->rcec;
	if (!rpdev) {
		pci_err(dev, "Root port not found\n");
		pci_err(dev, "Neither Root Port nor RCEC found\n");
		ret = -ENODEV;
		goto out_put;
	}
+68 −27
Original line number Diff line number Diff line
@@ -146,38 +146,71 @@ out:
	return 0;
}

/**
 * pci_walk_bridge - walk bridges potentially AER affected
 * @bridge:	bridge which may be a Port, an RCEC, or an RCiEP
 * @cb:		callback to be called for each device found
 * @userdata:	arbitrary pointer to be passed to callback
 *
 * If the device provided is a bridge, walk the subordinate bus, including
 * any bridged devices on buses under this bus.  Call the provided callback
 * on each device found.
 *
 * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP,
 * call the callback on the device itself.
 */
static void pci_walk_bridge(struct pci_dev *bridge,
			    int (*cb)(struct pci_dev *, void *),
			    void *userdata)
{
	if (bridge->subordinate)
		pci_walk_bus(bridge->subordinate, cb, userdata);
	else
		cb(bridge, userdata);
}

pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
		pci_channel_state_t state,
			pci_ers_result_t (*reset_link)(struct pci_dev *pdev))
		pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev))
{
	int type = pci_pcie_type(dev);
	struct pci_dev *bridge;
	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
	struct pci_bus *bus;
	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);

	/*
	 * Error recovery runs on all subordinates of the first downstream port.
	 * If the downstream port detected the error, it is cleared at the end.
	 * If the error was detected by a Root Port, Downstream Port, RCEC,
	 * or RCiEP, recovery runs on the device itself.  For Ports, that
	 * also includes any subordinate devices.
	 *
	 * If it was detected by another device (Endpoint, etc), recovery
	 * runs on the device and anything else under the same Port, i.e.,
	 * everything under "bridge".
	 */
	if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
	      pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM))
		dev = dev->bus->self;
	bus = dev->subordinate;

	pci_dbg(dev, "broadcast error_detected message\n");
	if (type == PCI_EXP_TYPE_ROOT_PORT ||
	    type == PCI_EXP_TYPE_DOWNSTREAM ||
	    type == PCI_EXP_TYPE_RC_EC ||
	    type == PCI_EXP_TYPE_RC_END)
		bridge = dev;
	else
		bridge = pci_upstream_bridge(dev);

	pci_dbg(bridge, "broadcast error_detected message\n");
	if (state == pci_channel_io_frozen) {
		pci_walk_bus(bus, report_frozen_detected, &status);
		status = reset_link(dev);
		pci_walk_bridge(bridge, report_frozen_detected, &status);
		status = reset_subordinates(bridge);
		if (status != PCI_ERS_RESULT_RECOVERED) {
			pci_warn(dev, "link reset failed\n");
			pci_warn(bridge, "subordinate device reset failed\n");
			goto failed;
		}
	} else {
		pci_walk_bus(bus, report_normal_detected, &status);
		pci_walk_bridge(bridge, report_normal_detected, &status);
	}

	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
		status = PCI_ERS_RESULT_RECOVERED;
		pci_dbg(dev, "broadcast mmio_enabled message\n");
		pci_walk_bus(bus, report_mmio_enabled, &status);
		pci_dbg(bridge, "broadcast mmio_enabled message\n");
		pci_walk_bridge(bridge, report_mmio_enabled, &status);
	}

	if (status == PCI_ERS_RESULT_NEED_RESET) {
@@ -187,27 +220,35 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
		 * drivers' slot_reset callbacks?
		 */
		status = PCI_ERS_RESULT_RECOVERED;
		pci_dbg(dev, "broadcast slot_reset message\n");
		pci_walk_bus(bus, report_slot_reset, &status);
		pci_dbg(bridge, "broadcast slot_reset message\n");
		pci_walk_bridge(bridge, report_slot_reset, &status);
	}

	if (status != PCI_ERS_RESULT_RECOVERED)
		goto failed;

	pci_dbg(dev, "broadcast resume message\n");
	pci_walk_bus(bus, report_resume, &status);
	pci_dbg(bridge, "broadcast resume message\n");
	pci_walk_bridge(bridge, report_resume, &status);

	if (pcie_aer_is_native(dev))
		pcie_clear_device_status(dev);
	pci_aer_clear_nonfatal_status(dev);
	pci_info(dev, "device recovery successful\n");
	/*
	 * If we have native control of AER, clear error status in the Root
	 * Port or Downstream Port that signaled the error.  If the
	 * platform retained control of AER, it is responsible for clearing
	 * this status.  In that case, the signaling device may not even be
	 * visible to the OS.
	 */
	if (host->native_aer || pcie_ports_native) {
		pcie_clear_device_status(bridge);
		pci_aer_clear_nonfatal_status(bridge);
	}
	pci_info(bridge, "device recovery successful\n");
	return status;

failed:
	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
	pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);

	/* TODO: Should kernel panic here? */
	pci_info(dev, "device recovery failed\n");
	pci_info(bridge, "device recovery failed\n");

	return status;
}
Loading