Commit 7263d72b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-smc-preparations-for-SMC-R-link-failover'



Karsten Graul says:

====================
net/smc: preparations for SMC-R link failover

This patch series prepares the SMC code for the implementation of SMC-R link
failover capabilities which are still missing to reach full compliance with
RFC 7609.
The code changes are separated into 65 patches which together form the new
functionality. I tried to create meaningful patches which allow to follow the
implementation.

Question: how to handle the remaining 52 patches? All of them are needed for
link failover to work and should make it into the same merge window.
Can I send them all together?

The SMC-R implementation will transparently make use of the link failover
feature when matching RoCE devices are available, no special setup is required.
All RoCE devices with the same PNET ID as the TCP device (hardware-defined or
user-defined via the smc_pnet tool) are candidates to get used to form a link
in a link group. When at least 2 RoCE devices are available on both
communication endpoints then a symmetric link group is formed, meaning the link
group has 2 independent links. If one RoCE device goes down then all connections
on this link are moved to the surviving link. Upon recovery of the failing
device or availability of a new one, the symmetric link group will be restored.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 21615efa 00a049cf
Loading
Loading
Loading
Loading
+46 −33
Original line number Diff line number Diff line
@@ -338,36 +338,53 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
}

/* register a new rmb, send confirm_rkey msg to register with peer */
static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
		       bool conf_rkey)
static int smcr_link_reg_rmb(struct smc_link *link,
			     struct smc_buf_desc *rmb_desc, bool conf_rkey)
{
	if (!rmb_desc->wr_reg) {
	if (!rmb_desc->is_reg_mr[link->link_idx]) {
		/* register memory region for new rmb */
		if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
			rmb_desc->regerr = 1;
		if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
			rmb_desc->is_reg_err = true;
			return -EFAULT;
		}
		rmb_desc->wr_reg = 1;
		rmb_desc->is_reg_mr[link->link_idx] = true;
	}
	if (!conf_rkey)
		return 0;

	/* exchange confirm_rkey msg with peer */
	if (!rmb_desc->is_conf_rkey) {
		if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
		rmb_desc->regerr = 1;
			rmb_desc->is_reg_err = true;
			return -EFAULT;
		}
		rmb_desc->is_conf_rkey = true;
	}
	return 0;
}

static int smc_clnt_conf_first_link(struct smc_sock *smc)
/* register the new rmb on all links */
static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
			     struct smc_buf_desc *rmb_desc)
{
	struct net *net = sock_net(smc->clcsock->sk);
	struct smc_link_group *lgr = smc->conn.lgr;
	struct smc_link *link;
	int i, rc;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
			continue;
		rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc, true);
		if (rc)
			return rc;
	}
	return 0;
}

static int smcr_clnt_conf_first_link(struct smc_sock *smc)
{
	struct smc_link *link = smc->conn.lnk;
	int rest;
	int rc;

	link = &lgr->lnk[SMC_SINGLE_LINK];
	/* receive CONFIRM LINK request from server over RoCE fabric */
	rest = wait_for_completion_interruptible_timeout(
		&link->llc_confirm,
@@ -389,7 +406,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)

	smc_wr_remember_qp_attr(link);

	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
		return SMC_CLC_DECL_ERR_REGRMB;

	/* send CONFIRM LINK response over RoCE fabric */
@@ -415,7 +432,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
	if (rc < 0)
		return SMC_CLC_DECL_TIMEOUT_AL;

	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
	smc_llc_link_active(link);

	return 0;
}
@@ -610,7 +627,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
		mutex_unlock(&smc_client_lgr_pending);
		return reason_code;
	}
	link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
	link = smc->conn.lnk;

	smc_conn_save_peer_info(smc, aclc);

@@ -622,7 +639,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
	if (ini->cln_first_contact == SMC_FIRST_CONTACT)
		smc_link_save_peer_info(link, aclc);

	if (smc_rmb_rtoken_handling(&smc->conn, aclc))
	if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
		return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
					 ini->cln_first_contact);

@@ -634,7 +651,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
						 ini->cln_first_contact);
	} else {
		if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
		if (smcr_lgr_reg_rmbs(smc->conn.lgr, smc->conn.rmb_desc))
			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
						 ini->cln_first_contact);
	}
@@ -649,7 +666,7 @@ static int smc_connect_rdma(struct smc_sock *smc,

	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
		/* QP confirmation over RoCE fabric */
		reason_code = smc_clnt_conf_first_link(smc);
		reason_code = smcr_clnt_conf_first_link(smc);
		if (reason_code)
			return smc_connect_abort(smc, reason_code,
						 ini->cln_first_contact);
@@ -999,17 +1016,13 @@ void smc_close_non_accepted(struct sock *sk)
	sock_put(sk); /* final sock_put */
}

static int smc_serv_conf_first_link(struct smc_sock *smc)
static int smcr_serv_conf_first_link(struct smc_sock *smc)
{
	struct net *net = sock_net(smc->clcsock->sk);
	struct smc_link_group *lgr = smc->conn.lgr;
	struct smc_link *link;
	struct smc_link *link = smc->conn.lnk;
	int rest;
	int rc;

	link = &lgr->lnk[SMC_SINGLE_LINK];

	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
		return SMC_CLC_DECL_ERR_REGRMB;

	/* send CONFIRM LINK request to client over the RoCE fabric */
@@ -1050,7 +1063,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
	}

	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
	smc_llc_link_active(link);

	return 0;
}
@@ -1194,10 +1207,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
{
	struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
	struct smc_connection *conn = &new_smc->conn;

	if (local_contact != SMC_FIRST_CONTACT) {
		if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
		if (smcr_lgr_reg_rmbs(conn->lgr, conn->rmb_desc))
			return SMC_CLC_DECL_ERR_REGRMB;
	}
	smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -1210,13 +1223,13 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
				  struct smc_clc_msg_accept_confirm *cclc,
				  int local_contact)
{
	struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
	struct smc_link *link = new_smc->conn.lnk;
	int reason_code = 0;

	if (local_contact == SMC_FIRST_CONTACT)
		smc_link_save_peer_info(link, cclc);

	if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
	if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
		reason_code = SMC_CLC_DECL_ERR_RTOK;
		goto decline;
	}
@@ -1227,7 +1240,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
			goto decline;
		}
		/* QP confirmation over RoCE fabric */
		reason_code = smc_serv_conf_first_link(new_smc);
		reason_code = smcr_serv_conf_first_link(new_smc);
		if (reason_code)
			goto decline;
	}
+1 −0
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@ enum smc_urg_state {
struct smc_connection {
	struct rb_node		alert_node;
	struct smc_link_group	*lgr;		/* link group of connection */
	struct smc_link		*lnk;		/* assigned SMC-R link */
	u32			alert_token_local; /* unique conn. id */
	u8			peer_rmbe_idx;	/* from tcp handshake */
	int			peer_rmbe_size;	/* size of peer rx buffer */
+3 −5
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
			  struct smc_rdma_wr **wr_rdma_buf,
			  struct smc_cdc_tx_pend **pend)
{
	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
	struct smc_link *link = conn->lnk;
	int rc;

	rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
@@ -91,12 +91,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
		     struct smc_wr_buf *wr_buf,
		     struct smc_cdc_tx_pend *pend)
{
	struct smc_link *link = conn->lnk;
	union smc_host_cursor cfed;
	struct smc_link *link;
	int rc;

	link = &conn->lgr->lnk[SMC_SINGLE_LINK];

	smc_cdc_add_pending_send(conn, pend);

	conn->tx_cdc_seq++;
@@ -165,7 +163,7 @@ static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)

void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
{
	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
	struct smc_link *link = conn->lnk;

	smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
				smc_cdc_tx_filter, smc_cdc_tx_dismisser,
+6 −6
Original line number Diff line number Diff line
@@ -496,7 +496,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
		       sizeof(SMCD_EYECATCHER));
	} else {
		/* SMC-R specific settings */
		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
		link = conn->lnk;
		memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
		       sizeof(SMC_EYECATCHER));
		cclc.hdr.path = SMC_TYPE_R;
@@ -508,13 +508,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
		       ETH_ALEN);
		hton24(cclc.qpn, link->roce_qp->qp_num);
		cclc.rmb_rkey =
			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
			htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
		cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
		cclc.rmbe_alert_token = htonl(conn->alert_token_local);
		cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
		cclc.rmbe_size = conn->rmbe_size_short;
		cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
				(conn->rmb_desc->sgt[link->link_idx].sgl));
		hton24(cclc.psn, link->psn_initial);
		memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
		       sizeof(SMC_EYECATCHER));
@@ -572,7 +572,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
		memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
		       sizeof(SMC_EYECATCHER));
		aclc.hdr.path = SMC_TYPE_R;
		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
		link = conn->lnk;
		memcpy(aclc.lcl.id_for_peer, local_systemid,
		       sizeof(local_systemid));
		memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
@@ -580,13 +580,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
		       ETH_ALEN);
		hton24(aclc.qpn, link->roce_qp->qp_num);
		aclc.rmb_rkey =
			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
			htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
		aclc.rmbe_idx = 1;		/* as long as 1 RMB = 1 RMBE */
		aclc.rmbe_alert_token = htonl(conn->alert_token_local);
		aclc.qp_mtu = link->path_mtu;
		aclc.rmbe_size = conn->rmbe_size_short,
		aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
				(conn->rmb_desc->sgt[link->link_idx].sgl));
		hton24(aclc.psn, link->psn_initial);
		memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
		       sizeof(SMC_EYECATCHER));
+1 −0
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@
#define SMC_CLC_DECL_DIFFPREFIX	0x03070000  /* IP prefix / subnet mismatch    */
#define SMC_CLC_DECL_GETVLANERR	0x03080000  /* err to get vlan id of ip device*/
#define SMC_CLC_DECL_ISMVLANERR	0x03090000  /* err to reg vlan id on ism dev  */
#define SMC_CLC_DECL_NOACTLINK	0x030a0000  /* no active smc-r link in lgr    */
#define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */
#define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR	0x09990000  /* internal error		      */
Loading