Commit 875fbdfe authored by James.Smart@Emulex.Com's avatar James.Smart@Emulex.Com Committed by James Bottomley
Browse files

[SCSI] lpfc 8.1.1 : Add polled-mode support



- Add functionality to run in polled mode only. Includes run time
  attribute to enable mode.
- Enable runtime writable hba settings for coallescing and delay parameters

Customers have requested a mode in the driver to run strictly polled.
This is generally to support an environment where the server is extremely
loaded and is looking to reclaim some cpu cycles from adapter interrupt
handling.

This patch adds a new "poll" attribute, and the following behavior:

if value is 0 (default):
  The driver uses the normal method for i/o completion. It uses the
  firmware feature of interrupt coalesing. The firmware allows a
  minimum number of i/o completions before an interrupt, or a maximum
  time delay between interrupts.  By default, the driver sets these
  to no delay (disabled) or 1 i/o - meaning coalescing is disabled.

  Attributes were provided to change the coalescing values, but it was
  a module-load time only and global across all adapters.
  This patch allows them to be writable on a per-adapter basis.

if value is 1 :
  Interrupts are left enabled, expecting that the user has tuned the
  interrupt coalescing values. When this setting is enabled, the driver
  will attempt to service completed i/o whenever new i/o is submitted
  to the adapter. If the coalescing values are large, and the i/o
  generation rate steady, an interrupt will be avoided by servicing
  completed i/o prior to the coalescing thresholds kicking in. However,
  if the i/o completion load is high enough or i/o generation slow, the
  coalescion values will ensure that completed i/o is serviced in a timely
  fashion.

if value is 3 :
  Turns off FCP i/o interrupts altogether. The coalescing values now have
  no effect. A new attribute "poll_tmo" (default 10ms) exists to set
  the polling interval for i/o completion. When this setting is enabled,
  the driver will attempt to service completed i/o and restart the
  interval timer whenever new i/o is submitted. This behavior allows for
  servicing of completed i/o sooner than the interval timer, but ensures
  that if no i/o is being issued, then the interval timer will kick in
  to service the outstanding i/o.

Signed-off-by: default avatarJames Smart <James.Smart@emulex.com>
Signed-off-by: default avatarJames Bottomley <James.Bottomley@SteelEye.com>
parent 5cc36b3c
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -45,6 +45,11 @@ struct lpfc_sli2_slim;

#define MAX_HBAEVT	32

enum lpfc_polling_flags {
	ENABLE_FCP_RING_POLLING = 0x1,
	DISABLE_FCP_RING_INT    = 0x2
};

/* Provide DMA memory definitions the driver uses per port instance. */
struct lpfc_dmabuf {
	struct list_head list;
@@ -287,6 +292,8 @@ struct lpfc_hba {
	uint32_t cfg_fcp_bind_method;
	uint32_t cfg_discovery_threads;
	uint32_t cfg_max_luns;
	uint32_t cfg_poll;
	uint32_t cfg_poll_tmo;
	uint32_t cfg_sg_seg_cnt;
	uint32_t cfg_sg_dma_buf_size;

@@ -338,7 +345,9 @@ struct lpfc_hba {
#define VPD_PORT            0x8         /* valid vpd port data */
#define VPD_MASK            0xf         /* mask for any vpd data */

	struct timer_list fcp_poll_timer;
	struct timer_list els_tmofunc;

	/*
	 * stat  counters
	 */
@@ -349,6 +358,7 @@ struct lpfc_hba {
	struct lpfc_sysfs_mbox sysfs_mbox;

	/* fastpath list. */
	spinlock_t scsi_buf_list_lock;
	struct list_head lpfc_scsi_buf_list;
	uint32_t total_scsi_bufs;
	struct list_head lpfc_iocb_list;
+90 −2
Original line number Diff line number Diff line
@@ -278,6 +278,71 @@ lpfc_board_online_store(struct class_device *cdev, const char *buf,
		return -EIO;
}

static ssize_t
lpfc_poll_show(struct class_device *cdev, char *buf)
{
	struct Scsi_Host *host = class_to_shost(cdev);
	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];

	return snprintf(buf, PAGE_SIZE, "%#x\n", phba->cfg_poll);
}

static ssize_t
lpfc_poll_store(struct class_device *cdev, const char *buf,
		size_t count)
{
	struct Scsi_Host *host = class_to_shost(cdev);
	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata[0];
	uint32_t creg_val;
	uint32_t old_val;
	int val=0;

	if (!isdigit(buf[0]))
		return -EINVAL;

	if (sscanf(buf, "%i", &val) != 1)
		return -EINVAL;

	if ((val & 0x3) != val)
		return -EINVAL;

	spin_lock_irq(phba->host->host_lock);

	old_val = phba->cfg_poll;

	if (val & ENABLE_FCP_RING_POLLING) {
		if ((val & DISABLE_FCP_RING_INT) &&
		    !(old_val & DISABLE_FCP_RING_INT)) {
			creg_val = readl(phba->HCregaddr);
			creg_val &= ~(HC_R0INT_ENA << LPFC_FCP_RING);
			writel(creg_val, phba->HCregaddr);
			readl(phba->HCregaddr); /* flush */

			lpfc_poll_start_timer(phba);
		}
	} else if (val != 0x0) {
		spin_unlock_irq(phba->host->host_lock);
		return -EINVAL;
	}

	if (!(val & DISABLE_FCP_RING_INT) &&
	    (old_val & DISABLE_FCP_RING_INT))
	{
		spin_unlock_irq(phba->host->host_lock);
		del_timer(&phba->fcp_poll_timer);
		spin_lock_irq(phba->host->host_lock);
		creg_val = readl(phba->HCregaddr);
		creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING);
		writel(creg_val, phba->HCregaddr);
		readl(phba->HCregaddr); /* flush */
	}

	phba->cfg_poll = val;

	spin_unlock_irq(phba->host->host_lock);

	return strlen(buf);
}

#define lpfc_param_show(attr)	\
static ssize_t \
@@ -416,6 +481,15 @@ static CLASS_DEVICE_ATTR(management_version, S_IRUGO, management_version_show,
static CLASS_DEVICE_ATTR(board_online, S_IRUGO | S_IWUSR,
			 lpfc_board_online_show, lpfc_board_online_store);

static int lpfc_poll = 0;
module_param(lpfc_poll, int, 0);
MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
		 " 0 - none,"
		 " 1 - poll with interrupts enabled"
		 " 3 - poll and disable FCP ring interrupts");

static CLASS_DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
			 lpfc_poll_show, lpfc_poll_store);

/*
# lpfc_log_verbose: Only turn this flag on if you are willing to risk being
@@ -523,10 +597,10 @@ LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");
# is 0. Default value of cr_count is 1. The cr_count feature is disabled if
# cr_delay is set to 0.
*/
LPFC_ATTR(cr_delay, 0, 0, 63, "A count of milliseconds after which an"
LPFC_ATTR_RW(cr_delay, 0, 0, 63, "A count of milliseconds after which an"
		"interrupt response is generated");

LPFC_ATTR(cr_count, 1, 1, 255, "A count of I/O completions after which an"
LPFC_ATTR_RW(cr_count, 1, 1, 255, "A count of I/O completions after which an"
		"interrupt response is generated");

/*
@@ -553,6 +627,13 @@ LPFC_ATTR(discovery_threads, 32, 1, 64, "Maximum number of ELS commands"
LPFC_ATTR_R(max_luns, 256, 1, 32768,
	     "Maximum number of LUNs per target driver will support");

/*
# lpfc_poll_tmo: .Milliseconds driver will wait between polling FCP ring.
# Value range is [1,255], default value is 10.
*/
LPFC_ATTR_RW(poll_tmo, 10, 1, 255,
	     "Milliseconds driver will wait between polling FCP ring");

struct class_device_attribute *lpfc_host_attrs[] = {
	&class_device_attr_info,
	&class_device_attr_serialnum,
@@ -575,11 +656,15 @@ struct class_device_attribute *lpfc_host_attrs[] = {
	&class_device_attr_lpfc_topology,
	&class_device_attr_lpfc_scan_down,
	&class_device_attr_lpfc_link_speed,
	&class_device_attr_lpfc_cr_delay,
	&class_device_attr_lpfc_cr_count,
	&class_device_attr_lpfc_fdmi_on,
	&class_device_attr_lpfc_max_luns,
	&class_device_attr_nport_evt_cnt,
	&class_device_attr_management_version,
	&class_device_attr_board_online,
	&class_device_attr_lpfc_poll,
	&class_device_attr_lpfc_poll_tmo,
	NULL,
};

@@ -1292,6 +1377,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
	lpfc_fdmi_on_init(phba, lpfc_fdmi_on);
	lpfc_discovery_threads_init(phba, lpfc_discovery_threads);
	lpfc_max_luns_init(phba, lpfc_max_luns);
	lpfc_poll_tmo_init(phba, lpfc_poll_tmo);

	phba->cfg_poll = lpfc_poll;

	/*
	 * The total number of segments is the configuration value plus 2
+3 −0
Original line number Diff line number Diff line
@@ -143,6 +143,9 @@ LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *);
int lpfc_mem_alloc(struct lpfc_hba *);
void lpfc_mem_free(struct lpfc_hba *);

void lpfc_poll_timeout(unsigned long ptr);
void lpfc_poll_start_timer(struct lpfc_hba * phba);
void lpfc_sli_poll_fcp_ring(struct lpfc_hba * hba);
struct lpfc_iocbq * lpfc_sli_get_iocbq(struct lpfc_hba *);
void lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb);
uint16_t lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocb);
+16 −0
Original line number Diff line number Diff line
@@ -370,6 +370,10 @@ lpfc_config_port_post(struct lpfc_hba * phba)
	if (psli->num_rings > 3)
		status |= HC_R3INT_ENA;

	if ((phba->cfg_poll & ENABLE_FCP_RING_POLLING) &&
	    (phba->cfg_poll & DISABLE_FCP_RING_INT))
		status &= ~(HC_R0INT_ENA << LPFC_FCP_RING);

	writel(status, phba->HCregaddr);
	readl(phba->HCregaddr); /* flush */
	spin_unlock_irq(phba->host->host_lock);
@@ -1237,6 +1241,7 @@ lpfc_stop_timer(struct lpfc_hba * phba)
		}
	}

	del_timer_sync(&phba->fcp_poll_timer);
	del_timer_sync(&phba->fc_estabtmo);
	del_timer_sync(&phba->fc_disctmo);
	del_timer_sync(&phba->fc_fdmitmo);
@@ -1416,6 +1421,10 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
	psli->mbox_tmo.function = lpfc_mbox_timeout;
	psli->mbox_tmo.data = (unsigned long)phba;

	init_timer(&phba->fcp_poll_timer);
	phba->fcp_poll_timer.function = lpfc_poll_timeout;
	phba->fcp_poll_timer.data = (unsigned long)phba;

	/*
	 * Get all the module params for configuring this host and then
	 * establish the host parameters.
@@ -1530,6 +1539,7 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
	host->max_cmd_len = 16;

	/* Initialize the list of scsi buffers used by driver for scsi IO. */
	spin_lock_init(&phba->scsi_buf_list_lock);
	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);

	host->transportt = lpfc_transport_template;
@@ -1561,6 +1571,12 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
	if (error)
		goto out_free_irq;

	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
		spin_lock_irq(phba->host->host_lock);
		lpfc_poll_start_timer(phba);
		spin_unlock_irq(phba->host->host_lock);
	}

	/*
	 * set fixed host attributes
	 * Must done after lpfc_sli_hba_setup()
+62 −10
Original line number Diff line number Diff line
@@ -151,18 +151,22 @@ lpfc_new_scsi_buf(struct lpfc_hba * phba)
}

struct  lpfc_scsi_buf*
lpfc_sli_get_scsi_buf(struct lpfc_hba * phba)
lpfc_get_scsi_buf(struct lpfc_hba * phba)
{
	struct  lpfc_scsi_buf * lpfc_cmd = NULL;
	struct list_head *scsi_buf_list = &phba->lpfc_scsi_buf_list;
	unsigned long iflag = 0;

	spin_lock_irqsave(&phba->scsi_buf_list_lock, iflag);
	list_remove_head(scsi_buf_list, lpfc_cmd, struct lpfc_scsi_buf, list);
	spin_unlock_irqrestore(&phba->scsi_buf_list_lock, iflag);
	return  lpfc_cmd;
}

static void
lpfc_release_scsi_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb)
{
	unsigned long iflag = 0;
	/*
	 * There are only two special cases to consider.  (1) the scsi command
	 * requested scatter-gather usage or (2) the scsi command allocated
@@ -180,8 +184,10 @@ lpfc_release_scsi_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb)
		 }
	}

	spin_lock_irqsave(&phba->scsi_buf_list_lock, iflag);
	psb->pCmd = NULL;
	list_add_tail(&psb->list, &phba->lpfc_scsi_buf_list);
	spin_unlock_irqrestore(&phba->scsi_buf_list_lock, iflag);
}

static int
@@ -403,7 +409,6 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
	struct lpfc_rport_data *rdata = lpfc_cmd->rdata;
	struct lpfc_nodelist *pnode = rdata->pnode;
	struct scsi_cmnd *cmd = lpfc_cmd->pCmd;
	unsigned long iflag;

	lpfc_cmd->result = pIocbOut->iocb.un.ulpWord[4];
	lpfc_cmd->status = pIocbOut->iocb.ulpStatus;
@@ -457,9 +462,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,

	cmd->scsi_done(cmd);

	spin_lock_irqsave(phba->host->host_lock, iflag);
	lpfc_release_scsi_buf(phba, lpfc_cmd);
	spin_unlock_irqrestore(phba->host->host_lock, iflag);
}

static void
@@ -707,6 +710,37 @@ lpfc_info(struct Scsi_Host *host)
	return lpfcinfobuf;
}

static __inline__ void lpfc_poll_rearm_timer(struct lpfc_hba * phba)
{
	unsigned long  poll_tmo_expires =
		(jiffies + msecs_to_jiffies(phba->cfg_poll_tmo));

	if (phba->sli.ring[LPFC_FCP_RING].txcmplq_cnt)
		mod_timer(&phba->fcp_poll_timer,
			  poll_tmo_expires);
}

void lpfc_poll_start_timer(struct lpfc_hba * phba)
{
	lpfc_poll_rearm_timer(phba);
}

void lpfc_poll_timeout(unsigned long ptr)
{
	struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
	unsigned long iflag;

	spin_lock_irqsave(phba->host->host_lock, iflag);

	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
		lpfc_sli_poll_fcp_ring (phba);
		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
			lpfc_poll_rearm_timer(phba);
	}

	spin_unlock_irqrestore(phba->host->host_lock, iflag);
}

static int
lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
{
@@ -733,7 +767,7 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
		cmnd->result = ScsiResult(DID_BUS_BUSY, 0);
		goto out_fail_command;
	}
	lpfc_cmd = lpfc_sli_get_scsi_buf (phba);
	lpfc_cmd = lpfc_get_scsi_buf (phba);
	if (lpfc_cmd == NULL) {
		lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
				"%d:0707 driver's buffer pool is empty, "
@@ -761,11 +795,17 @@ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
				&lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
	if (err)
		goto out_host_busy_free_buf;

	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
		lpfc_sli_poll_fcp_ring(phba);
		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
			lpfc_poll_rearm_timer(phba);
	}

	return 0;

 out_host_busy_free_buf:
	lpfc_release_scsi_buf(phba, lpfc_cmd);
	cmnd->host_scribble = NULL;
 out_host_busy:
	return SCSI_MLQUEUE_HOST_BUSY;

@@ -839,9 +879,15 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
		goto out;
	}

	if (phba->cfg_poll & DISABLE_FCP_RING_INT)
		lpfc_sli_poll_fcp_ring (phba);

	/* Wait for abort to complete */
	while (lpfc_cmd->pCmd == cmnd)
	{
		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
			lpfc_sli_poll_fcp_ring (phba);

		spin_unlock_irq(phba->host->host_lock);
			schedule_timeout_uninterruptible(LPFC_ABORT_WAIT*HZ);
		spin_lock_irq(phba->host->host_lock);
@@ -905,7 +951,7 @@ lpfc_reset_lun_handler(struct scsi_cmnd *cmnd)
			break;
	}

	lpfc_cmd = lpfc_sli_get_scsi_buf (phba);
	lpfc_cmd = lpfc_get_scsi_buf (phba);
	if (lpfc_cmd == NULL)
		goto out;

@@ -1001,7 +1047,7 @@ lpfc_reset_bus_handler(struct scsi_cmnd *cmnd)
	lpfc_block_requests(phba);
	spin_lock_irq(shost->host_lock);

	lpfc_cmd = lpfc_sli_get_scsi_buf (phba);
	lpfc_cmd = lpfc_get_scsi_buf(phba);
	if (lpfc_cmd == NULL)
		goto out;

@@ -1136,10 +1182,10 @@ lpfc_slave_alloc(struct scsi_device *sdev)
			break;
		}

		spin_lock_irqsave(phba->host->host_lock, flags);
		spin_lock_irqsave(&phba->scsi_buf_list_lock, flags);
		phba->total_scsi_bufs++;
		list_add_tail(&scsi_buf->list, &phba->lpfc_scsi_buf_list);
		spin_unlock_irqrestore(phba->host->host_lock, flags);
		spin_unlock_irqrestore(&phba->scsi_buf_list_lock, flags);
	}
	return 0;
}
@@ -1163,6 +1209,12 @@ lpfc_slave_configure(struct scsi_device *sdev)
	 */
	rport->dev_loss_tmo = phba->cfg_nodev_tmo + 5;

	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
		lpfc_sli_poll_fcp_ring(phba);
		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
			lpfc_poll_rearm_timer(phba);
	}

	return 0;
}

Loading