Commit 0ae207fb authored by Håkon Bugge's avatar Håkon Bugge Committed by Jason Gunthorpe
Browse files

IB/mlx4: Separate tunnel and wire bufs parameters

Using CX-3 in virtualized mode, MAD packets are proxied through the PF
driver. The feed is N tunnel QPs, and what is received from the VFs is
multiplexed out on the wire QP. Since this is a many-to-one scenario, it
is better to have separate initialization parameters for the two usages.

The number of wire and tunnel bufs are yanked up to 2K and 512
respectively. With this set of parameters, a system consisting of eight
physical servers, each with eight VMs and 14 I/O servers (BM), can run
switch fail-over without seeing:

mlx4_ib_demux_mad: failed sending GSI to slave 3 via tunnel qp (-11)

or

mlx4_ib_multiplex_mad: failed sending GSI to wire on behalf of slave 2 (-11)

Fixes: 3cf69cc8 ("IB/mlx4: Add CM paravirtualization")
Link: https://lore.kernel.org/r/20200803061941.1139994-4-haakon.bugge@oracle.com


Signed-off-by: default avatarHåkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent e7d087fc
Loading
Loading
Loading
Loading
+24 −20
Original line number Diff line number Diff line
@@ -1391,10 +1391,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,

	spin_lock(&sqp->tx_lock);
	if (sqp->tx_ix_head - sqp->tx_ix_tail >=
	    (MLX4_NUM_TUNNEL_BUFS - 1))
	    (MLX4_NUM_WIRE_BUFS - 1))
		ret = -EAGAIN;
	else
		wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
		wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
	spin_unlock(&sqp->tx_lock);
	if (ret)
		goto out;
@@ -1590,19 +1590,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
	int i;
	struct mlx4_ib_demux_pv_qp *tun_qp;
	int rx_buf_size, tx_buf_size;
	const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;

	if (qp_type > IB_QPT_GSI)
		return -EINVAL;

	tun_qp = &ctx->qp[qp_type];

	tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
	tun_qp->ring = kcalloc(nmbr_bufs,
			       sizeof(struct mlx4_ib_buf),
			       GFP_KERNEL);
	if (!tun_qp->ring)
		return -ENOMEM;

	tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
	tun_qp->tx_ring = kcalloc(nmbr_bufs,
				  sizeof (struct mlx4_ib_tun_tx_buf),
				  GFP_KERNEL);
	if (!tun_qp->tx_ring) {
@@ -1619,7 +1620,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
		tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
	}

	for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
	for (i = 0; i < nmbr_bufs; i++) {
		tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
		if (!tun_qp->ring[i].addr)
			goto err;
@@ -1633,7 +1634,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
		}
	}

	for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
	for (i = 0; i < nmbr_bufs; i++) {
		tun_qp->tx_ring[i].buf.addr =
			kmalloc(tx_buf_size, GFP_KERNEL);
		if (!tun_qp->tx_ring[i].buf.addr)
@@ -1664,7 +1665,7 @@ tx_err:
				    tx_buf_size, DMA_TO_DEVICE);
		kfree(tun_qp->tx_ring[i].buf.addr);
	}
	i = MLX4_NUM_TUNNEL_BUFS;
	i = nmbr_bufs;
err:
	while (i > 0) {
		--i;
@@ -1685,6 +1686,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
	int i;
	struct mlx4_ib_demux_pv_qp *tun_qp;
	int rx_buf_size, tx_buf_size;
	const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;

	if (qp_type > IB_QPT_GSI)
		return;
@@ -1699,13 +1701,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
	}


	for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
	for (i = 0; i < nmbr_bufs; i++) {
		ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
				    rx_buf_size, DMA_FROM_DEVICE);
		kfree(tun_qp->ring[i].addr);
	}

	for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
	for (i = 0; i < nmbr_bufs; i++) {
		ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
				    tx_buf_size, DMA_TO_DEVICE);
		kfree(tun_qp->tx_ring[i].buf.addr);
@@ -1785,6 +1787,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
	struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
	struct ib_qp_attr attr;
	int qp_attr_mask_INIT;
	const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;

	if (qp_type > IB_QPT_GSI)
		return -EINVAL;
@@ -1795,8 +1798,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
	qp_init_attr.init_attr.send_cq = ctx->cq;
	qp_init_attr.init_attr.recv_cq = ctx->cq;
	qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
	qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
	qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
	qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
	qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
	qp_init_attr.init_attr.cap.max_send_sge = 1;
	qp_init_attr.init_attr.cap.max_recv_sge = 1;
	if (create_tun) {
@@ -1858,7 +1861,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
		goto err_qp;
	}

	for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
	for (i = 0; i < nmbr_bufs; i++) {
		ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
		if (ret) {
			pr_err(" mlx4_ib_post_pv_buf error"
@@ -1894,8 +1897,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
			switch (wc.opcode) {
			case IB_WC_SEND:
				kfree(sqp->tx_ring[wc.wr_id &
				      (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
				sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
				      (MLX4_NUM_WIRE_BUFS - 1)].ah);
				sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
					= NULL;
				spin_lock(&sqp->tx_lock);
				sqp->tx_ix_tail++;
@@ -1904,13 +1907,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
			case IB_WC_RECV:
				mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
						(sqp->ring[wc.wr_id &
						(MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
						(MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
				grh = &(((struct mlx4_mad_rcv_buf *)
						(sqp->ring[wc.wr_id &
						(MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
						(MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
				mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
				if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
							   (MLX4_NUM_TUNNEL_BUFS - 1)))
							   (MLX4_NUM_WIRE_BUFS - 1)))
					pr_err("Failed reposting SQP "
					       "buf:%lld\n", wc.wr_id);
				break;
@@ -1923,8 +1926,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
				 ctx->slave, wc.status, wc.wr_id);
			if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
				kfree(sqp->tx_ring[wc.wr_id &
				      (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
				sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
				      (MLX4_NUM_WIRE_BUFS - 1)].ah);
				sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
					= NULL;
				spin_lock(&sqp->tx_lock);
				sqp->tx_ix_tail++;
@@ -1964,6 +1967,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
{
	int ret, cq_size;
	struct ib_cq_init_attr cq_attr = {};
	const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;

	if (ctx->state != DEMUX_PV_STATE_DOWN)
		return -EEXIST;
@@ -1988,7 +1992,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
		goto err_out_qp0;
	}

	cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
	cq_size = 2 * nmbr_bufs;
	if (ctx->has_smi)
		cq_size *= 2;

+2 −1
Original line number Diff line number Diff line
@@ -233,7 +233,8 @@ enum mlx4_ib_mad_ifc_flags {
};

enum {
	MLX4_NUM_TUNNEL_BUFS		= 256,
	MLX4_NUM_TUNNEL_BUFS		= 512,
	MLX4_NUM_WIRE_BUFS		= 2048,
};

struct mlx4_ib_tunnel_header {