Commit 05c6c029 authored by Viswas G's avatar Viswas G Committed by Martin K. Petersen
Browse files

scsi: pm80xx: Increase number of supported queues

Current driver uses fixed number of Inbound and Outbound queues and all of
the I/O, TMF and internal requests are submitted through those. A global
spin lock is used to control the shared access. This can create a lock
contention and it is real bottleneck in the I/O path.

To avoid this, the number of supported Inbound and Outbound queues is
increased to 64, and the number of queues used is decided based on number
of CPU cores online and number of MSI-X vectors allocated. Also add locks
per queue instead of using the global lock.

Link: https://lore.kernel.org/r/20201005145011.23674-2-Viswas.G@microchip.com.com


Acked-by: default avatarJack Wang <jinpu.wang@cloud.ionos.com>
Signed-off-by: default avatarViswas G <Viswas.G@microchip.com>
Signed-off-by: default avatarRuksar Devadi <Ruksar.devadi@microchip.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 1725ba8d
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -408,9 +408,10 @@ static ssize_t pm8001_ctl_ib_queue_log_show(struct device *cdev,
	int offset;
	char *str = buf;
	int start = 0;
	u32 ib_offset = pm8001_ha->ib_offset;
#define IB_MEMMAP(c)	\
		(*(u32 *)((u8 *)pm8001_ha->	\
		memoryMap.region[IB].virt_ptr +	\
		memoryMap.region[ib_offset].virt_ptr +	\
		pm8001_ha->evtlog_ib_offset + (c)))

	for (offset = 0; offset < IB_OB_READ_TIMES; offset++) {
@@ -442,9 +443,10 @@ static ssize_t pm8001_ctl_ob_queue_log_show(struct device *cdev,
	int offset;
	char *str = buf;
	int start = 0;
	u32 ob_offset = pm8001_ha->ob_offset;
#define OB_MEMMAP(c)	\
		(*(u32 *)((u8 *)pm8001_ha->	\
		memoryMap.region[OB].virt_ptr +	\
		memoryMap.region[ob_offset].virt_ptr +	\
		pm8001_ha->evtlog_ob_offset + (c)))

	for (offset = 0; offset < IB_OB_READ_TIMES; offset++) {
+8 −9
Original line number Diff line number Diff line
@@ -77,10 +77,8 @@ enum port_type {
/* driver compile-time configuration */
#define	PM8001_MAX_CCB		 256	/* max ccbs supported */
#define PM8001_MPI_QUEUE         1024   /* maximum mpi queue entries */
#define	PM8001_MAX_INB_NUM	 1
#define	PM8001_MAX_OUTB_NUM	 1
#define	PM8001_MAX_SPCV_INB_NUM		1
#define	PM8001_MAX_SPCV_OUTB_NUM	4
#define	PM8001_MAX_INB_NUM	 64
#define	PM8001_MAX_OUTB_NUM	 64
#define	PM8001_CAN_QUEUE	 508	/* SCSI Queue depth */

/* Inbound/Outbound queue size */
@@ -94,11 +92,6 @@ enum port_type {
#define	PM8001_MAX_MSIX_VEC	 64	/* max msi-x int for spcv/ve */

#define USI_MAX_MEMCNT_BASE	5
#define IB			(USI_MAX_MEMCNT_BASE + 1)
#define CI			(IB + PM8001_MAX_SPCV_INB_NUM)
#define OB			(CI + PM8001_MAX_SPCV_INB_NUM)
#define PI			(OB + PM8001_MAX_SPCV_OUTB_NUM)
#define USI_MAX_MEMCNT		(PI + PM8001_MAX_SPCV_OUTB_NUM)
#define	CONFIG_SCSI_PM8001_MAX_DMA_SG	528
#define PM8001_MAX_DMA_SG	CONFIG_SCSI_PM8001_MAX_DMA_SG
enum memory_region_num {
@@ -112,6 +105,12 @@ enum memory_region_num {
};
#define	PM8001_EVENT_LOG_SIZE	 (128 * 1024)

/**
 * maximum DMA memory regions(number of IBQ + number of IBQ CI
 * + number of  OBQ + number of OBQ PI)
 */
#define USI_MAX_MEMCNT	(USI_MAX_MEMCNT_BASE + 1 + ((2 * PM8001_MAX_INB_NUM) \
			+ (2 * PM8001_MAX_OUTB_NUM)))
/*error code*/
enum mpi_err {
	MPI_IO_STATUS_SUCCESS = 0x0,
+18 −14
Original line number Diff line number Diff line
@@ -189,6 +189,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
	u32 offsetib, offsetob;
	void __iomem *addressib = pm8001_ha->inbnd_q_tbl_addr;
	void __iomem *addressob = pm8001_ha->outbnd_q_tbl_addr;
	u32 ib_offset = pm8001_ha->ib_offset;
	u32 ob_offset = pm8001_ha->ob_offset;
	u32 ci_offset = pm8001_ha->ci_offset;
	u32 pi_offset = pm8001_ha->pi_offset;

	pm8001_ha->main_cfg_tbl.pm8001_tbl.inbound_q_nppd_hppd		= 0;
	pm8001_ha->main_cfg_tbl.pm8001_tbl.outbound_hw_event_pid0_3	= 0;
@@ -223,19 +227,19 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
		pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt	=
			PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x00<<30);
		pm8001_ha->inbnd_q_tbl[i].upper_base_addr	=
			pm8001_ha->memoryMap.region[IB + i].phys_addr_hi;
			pm8001_ha->memoryMap.region[ib_offset + i].phys_addr_hi;
		pm8001_ha->inbnd_q_tbl[i].lower_base_addr	=
		pm8001_ha->memoryMap.region[IB + i].phys_addr_lo;
		pm8001_ha->memoryMap.region[ib_offset + i].phys_addr_lo;
		pm8001_ha->inbnd_q_tbl[i].base_virt		=
			(u8 *)pm8001_ha->memoryMap.region[IB + i].virt_ptr;
		  (u8 *)pm8001_ha->memoryMap.region[ib_offset + i].virt_ptr;
		pm8001_ha->inbnd_q_tbl[i].total_length		=
			pm8001_ha->memoryMap.region[IB + i].total_len;
			pm8001_ha->memoryMap.region[ib_offset + i].total_len;
		pm8001_ha->inbnd_q_tbl[i].ci_upper_base_addr	=
			pm8001_ha->memoryMap.region[CI + i].phys_addr_hi;
			pm8001_ha->memoryMap.region[ci_offset + i].phys_addr_hi;
		pm8001_ha->inbnd_q_tbl[i].ci_lower_base_addr	=
			pm8001_ha->memoryMap.region[CI + i].phys_addr_lo;
			pm8001_ha->memoryMap.region[ci_offset + i].phys_addr_lo;
		pm8001_ha->inbnd_q_tbl[i].ci_virt		=
			pm8001_ha->memoryMap.region[CI + i].virt_ptr;
			pm8001_ha->memoryMap.region[ci_offset + i].virt_ptr;
		offsetib = i * 0x20;
		pm8001_ha->inbnd_q_tbl[i].pi_pci_bar		=
			get_pci_bar_index(pm8001_mr32(addressib,
@@ -249,21 +253,21 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha)
		pm8001_ha->outbnd_q_tbl[i].element_size_cnt	=
			PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x01<<30);
		pm8001_ha->outbnd_q_tbl[i].upper_base_addr	=
			pm8001_ha->memoryMap.region[OB + i].phys_addr_hi;
			pm8001_ha->memoryMap.region[ob_offset + i].phys_addr_hi;
		pm8001_ha->outbnd_q_tbl[i].lower_base_addr	=
			pm8001_ha->memoryMap.region[OB + i].phys_addr_lo;
			pm8001_ha->memoryMap.region[ob_offset + i].phys_addr_lo;
		pm8001_ha->outbnd_q_tbl[i].base_virt		=
			(u8 *)pm8001_ha->memoryMap.region[OB + i].virt_ptr;
		  (u8 *)pm8001_ha->memoryMap.region[ob_offset + i].virt_ptr;
		pm8001_ha->outbnd_q_tbl[i].total_length		=
			pm8001_ha->memoryMap.region[OB + i].total_len;
			pm8001_ha->memoryMap.region[ob_offset + i].total_len;
		pm8001_ha->outbnd_q_tbl[i].pi_upper_base_addr	=
			pm8001_ha->memoryMap.region[PI + i].phys_addr_hi;
			pm8001_ha->memoryMap.region[pi_offset + i].phys_addr_hi;
		pm8001_ha->outbnd_q_tbl[i].pi_lower_base_addr	=
			pm8001_ha->memoryMap.region[PI + i].phys_addr_lo;
			pm8001_ha->memoryMap.region[pi_offset + i].phys_addr_lo;
		pm8001_ha->outbnd_q_tbl[i].interrup_vec_cnt_delay	=
			0 | (10 << 16) | (i << 24);
		pm8001_ha->outbnd_q_tbl[i].pi_virt		=
			pm8001_ha->memoryMap.region[PI + i].virt_ptr;
			pm8001_ha->memoryMap.region[pi_offset + i].virt_ptr;
		offsetob = i * 0x24;
		pm8001_ha->outbnd_q_tbl[i].ci_pci_bar		=
			get_pci_bar_index(pm8001_mr32(addressob,
+73 −44
Original line number Diff line number Diff line
@@ -264,12 +264,36 @@ static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha);
static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
			const struct pci_device_id *ent)
{
	int i;
	int i, count = 0, rc = 0;
	u32 ci_offset, ib_offset, ob_offset, pi_offset;
	struct inbound_queue_table *circularQ;

	spin_lock_init(&pm8001_ha->lock);
	spin_lock_init(&pm8001_ha->bitmap_lock);
	PM8001_INIT_DBG(pm8001_ha,
		pm8001_printk("pm8001_alloc: PHY:%x\n",
				pm8001_ha->chip->n_phy));

	/* Setup Interrupt */
	rc = pm8001_setup_irq(pm8001_ha);
	if (rc) {
		PM8001_FAIL_DBG(pm8001_ha, pm8001_printk(
				"pm8001_setup_irq failed [ret: %d]\n", rc));
		goto err_out_shost;
	}
	/* Request Interrupt */
	rc = pm8001_request_irq(pm8001_ha);
	if (rc)
		goto err_out_shost;

	count = pm8001_ha->max_q_num;
	/* Queues are chosen based on the number of cores/msix availability */
	ib_offset = pm8001_ha->ib_offset  = USI_MAX_MEMCNT_BASE + 1;
	ci_offset = pm8001_ha->ci_offset  = ib_offset + count;
	ob_offset = pm8001_ha->ob_offset  = ci_offset + count;
	pi_offset = pm8001_ha->pi_offset  = ob_offset + count;
	pm8001_ha->max_memcnt = pi_offset + count;

	for (i = 0; i < pm8001_ha->chip->n_phy; i++) {
		pm8001_phy_init(pm8001_ha, i);
		pm8001_ha->port[i].wide_port_phymap = 0;
@@ -293,54 +317,62 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
	pm8001_ha->memoryMap.region[IOP].total_len = PM8001_EVENT_LOG_SIZE;
	pm8001_ha->memoryMap.region[IOP].alignment = 32;

	for (i = 0; i < PM8001_MAX_SPCV_INB_NUM; i++) {
	for (i = 0; i < count; i++) {
		circularQ = &pm8001_ha->inbnd_q_tbl[i];
		spin_lock_init(&circularQ->iq_lock);
		/* MPI Memory region 3 for consumer Index of inbound queues */
		pm8001_ha->memoryMap.region[CI+i].num_elements = 1;
		pm8001_ha->memoryMap.region[CI+i].element_size = 4;
		pm8001_ha->memoryMap.region[CI+i].total_len = 4;
		pm8001_ha->memoryMap.region[CI+i].alignment = 4;
		pm8001_ha->memoryMap.region[ci_offset+i].num_elements = 1;
		pm8001_ha->memoryMap.region[ci_offset+i].element_size = 4;
		pm8001_ha->memoryMap.region[ci_offset+i].total_len = 4;
		pm8001_ha->memoryMap.region[ci_offset+i].alignment = 4;

		if ((ent->driver_data) != chip_8001) {
			/* MPI Memory region 5 inbound queues */
			pm8001_ha->memoryMap.region[IB+i].num_elements =
			pm8001_ha->memoryMap.region[ib_offset+i].num_elements =
						PM8001_MPI_QUEUE;
			pm8001_ha->memoryMap.region[IB+i].element_size = 128;
			pm8001_ha->memoryMap.region[IB+i].total_len =
			pm8001_ha->memoryMap.region[ib_offset+i].element_size
								= 128;
			pm8001_ha->memoryMap.region[ib_offset+i].total_len =
						PM8001_MPI_QUEUE * 128;
			pm8001_ha->memoryMap.region[IB+i].alignment = 128;
			pm8001_ha->memoryMap.region[ib_offset+i].alignment
								= 128;
		} else {
			pm8001_ha->memoryMap.region[IB+i].num_elements =
			pm8001_ha->memoryMap.region[ib_offset+i].num_elements =
						PM8001_MPI_QUEUE;
			pm8001_ha->memoryMap.region[IB+i].element_size = 64;
			pm8001_ha->memoryMap.region[IB+i].total_len =
			pm8001_ha->memoryMap.region[ib_offset+i].element_size
								= 64;
			pm8001_ha->memoryMap.region[ib_offset+i].total_len =
						PM8001_MPI_QUEUE * 64;
			pm8001_ha->memoryMap.region[IB+i].alignment = 64;
			pm8001_ha->memoryMap.region[ib_offset+i].alignment = 64;
		}
	}

	for (i = 0; i < PM8001_MAX_SPCV_OUTB_NUM; i++) {
	for (i = 0; i < count; i++) {
		/* MPI Memory region 4 for producer Index of outbound queues */
		pm8001_ha->memoryMap.region[PI+i].num_elements = 1;
		pm8001_ha->memoryMap.region[PI+i].element_size = 4;
		pm8001_ha->memoryMap.region[PI+i].total_len = 4;
		pm8001_ha->memoryMap.region[PI+i].alignment = 4;
		pm8001_ha->memoryMap.region[pi_offset+i].num_elements = 1;
		pm8001_ha->memoryMap.region[pi_offset+i].element_size = 4;
		pm8001_ha->memoryMap.region[pi_offset+i].total_len = 4;
		pm8001_ha->memoryMap.region[pi_offset+i].alignment = 4;

		if (ent->driver_data != chip_8001) {
			/* MPI Memory region 6 Outbound queues */
			pm8001_ha->memoryMap.region[OB+i].num_elements =
			pm8001_ha->memoryMap.region[ob_offset+i].num_elements =
						PM8001_MPI_QUEUE;
			pm8001_ha->memoryMap.region[OB+i].element_size = 128;
			pm8001_ha->memoryMap.region[OB+i].total_len =
			pm8001_ha->memoryMap.region[ob_offset+i].element_size
								= 128;
			pm8001_ha->memoryMap.region[ob_offset+i].total_len =
						PM8001_MPI_QUEUE * 128;
			pm8001_ha->memoryMap.region[OB+i].alignment = 128;
			pm8001_ha->memoryMap.region[ob_offset+i].alignment
								= 128;
		} else {
			/* MPI Memory region 6 Outbound queues */
			pm8001_ha->memoryMap.region[OB+i].num_elements =
			pm8001_ha->memoryMap.region[ob_offset+i].num_elements =
						PM8001_MPI_QUEUE;
			pm8001_ha->memoryMap.region[OB+i].element_size = 64;
			pm8001_ha->memoryMap.region[OB+i].total_len =
			pm8001_ha->memoryMap.region[ob_offset+i].element_size
								= 64;
			pm8001_ha->memoryMap.region[ob_offset+i].total_len =
						PM8001_MPI_QUEUE * 64;
			pm8001_ha->memoryMap.region[OB+i].alignment = 64;
			pm8001_ha->memoryMap.region[ob_offset+i].alignment = 64;
		}

	}
@@ -369,7 +401,7 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
	pm8001_ha->memoryMap.region[FORENSIC_MEM].total_len = 0x10000;
	pm8001_ha->memoryMap.region[FORENSIC_MEM].element_size = 0x10000;
	pm8001_ha->memoryMap.region[FORENSIC_MEM].alignment = 0x10000;
	for (i = 0; i < USI_MAX_MEMCNT; i++) {
	for (i = 0; i < pm8001_ha->max_memcnt; i++) {
		if (pm8001_mem_alloc(pm8001_ha->pdev,
			&pm8001_ha->memoryMap.region[i].virt_ptr,
			&pm8001_ha->memoryMap.region[i].phys_addr,
@@ -405,6 +437,8 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
	/* Initialize tags */
	pm8001_tag_init(pm8001_ha);
	return 0;
err_out_shost:
	scsi_remove_host(pm8001_ha->shost);
err_out:
	return 1;
}
@@ -899,7 +933,8 @@ static int pm8001_configure_phy_settings(struct pm8001_hba_info *pm8001_ha)
static u32 pm8001_setup_msix(struct pm8001_hba_info *pm8001_ha)
{
	u32 number_of_intr;
	int rc;
	int rc, cpu_online_count;
	unsigned int allocated_irq_vectors;

	/* SPCv controllers supports 64 msi-x */
	if (pm8001_ha->chip_id == chip_8001) {
@@ -908,13 +943,21 @@ static u32 pm8001_setup_msix(struct pm8001_hba_info *pm8001_ha)
		number_of_intr = PM8001_MAX_MSIX_VEC;
	}

	cpu_online_count = num_online_cpus();
	number_of_intr = min_t(int, cpu_online_count, number_of_intr);
	rc = pci_alloc_irq_vectors(pm8001_ha->pdev, number_of_intr,
			number_of_intr, PCI_IRQ_MSIX);
	number_of_intr = rc;
	allocated_irq_vectors = rc;
	if (rc < 0)
		return rc;

	/* Assigns the number of interrupts */
	number_of_intr = min_t(int, allocated_irq_vectors, number_of_intr);
	pm8001_ha->number_of_intr = number_of_intr;

	/* Maximum queue number updating in HBA structure */
	pm8001_ha->max_q_num = number_of_intr;

	PM8001_INIT_DBG(pm8001_ha, pm8001_printk(
		"pci_alloc_irq_vectors request ret:%d no of intr %d\n",
				rc, pm8001_ha->number_of_intr));
@@ -1069,13 +1112,6 @@ static int pm8001_pci_probe(struct pci_dev *pdev,
		rc = -ENOMEM;
		goto err_out_free;
	}
	/* Setup Interrupt */
	rc = pm8001_setup_irq(pm8001_ha);
	if (rc)	{
		PM8001_FAIL_DBG(pm8001_ha, pm8001_printk(
			"pm8001_setup_irq failed [ret: %d]\n", rc));
		goto err_out_shost;
	}

	PM8001_CHIP_DISP->chip_soft_rst(pm8001_ha);
	rc = PM8001_CHIP_DISP->chip_init(pm8001_ha);
@@ -1088,13 +1124,6 @@ static int pm8001_pci_probe(struct pci_dev *pdev,
	rc = scsi_add_host(shost, &pdev->dev);
	if (rc)
		goto err_out_ha_free;
	/* Request Interrupt */
	rc = pm8001_request_irq(pm8001_ha);
	if (rc)	{
		PM8001_FAIL_DBG(pm8001_ha, pm8001_printk(
			"pm8001_request_irq failed [ret: %d]\n", rc));
		goto err_out_shost;
	}

	PM8001_CHIP_DISP->interrupt_enable(pm8001_ha, 0);
	if (pm8001_ha->chip_id != chip_8001) {
+9 −2
Original line number Diff line number Diff line
@@ -468,6 +468,7 @@ struct inbound_queue_table {
	u32			reserved;
	__le32			consumer_index;
	u32			producer_idx;
	spinlock_t		iq_lock;
};
struct outbound_queue_table {
	u32			element_size_cnt;
@@ -524,8 +525,8 @@ struct pm8001_hba_info {
	void __iomem	*fatal_tbl_addr; /*MPI IVT Table Addr */
	union main_cfg_table	main_cfg_tbl;
	union general_status_table	gs_tbl;
	struct inbound_queue_table	inbnd_q_tbl[PM8001_MAX_SPCV_INB_NUM];
	struct outbound_queue_table	outbnd_q_tbl[PM8001_MAX_SPCV_OUTB_NUM];
	struct inbound_queue_table	inbnd_q_tbl[PM8001_MAX_INB_NUM];
	struct outbound_queue_table	outbnd_q_tbl[PM8001_MAX_OUTB_NUM];
	struct sas_phy_attribute_table	phy_attr_table;
					/* MPI SAS PHY attributes */
	u8			sas_addr[SAS_ADDR_SIZE];
@@ -561,6 +562,12 @@ struct pm8001_hba_info {
	u32			reset_in_progress;
	u32			non_fatal_count;
	u32			non_fatal_read_length;
	u32 max_q_num;
	u32 ib_offset;
	u32 ob_offset;
	u32 ci_offset;
	u32 pi_offset;
	u32 max_memcnt;
};

struct pm8001_work {
Loading