Commit dbec982c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-smc-cleanups'



Ursula Braun says:

====================
net/smc: cleanups 2018-05-18

here are SMC patches for net-next providing restructuring and cleanup
in different areas.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d6830519 3b2dec26
Loading
Loading
Loading
Loading
+325 −262
Original line number Diff line number Diff line
@@ -46,11 +46,6 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
						 * creation
						 */

struct smc_lgr_list smc_lgr_list = {		/* established link groups */
	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
	.list = LIST_HEAD_INIT(smc_lgr_list.list),
};

static void smc_tcp_listen_work(struct work_struct *);

static void smc_set_keepalive(struct sock *sk, int val)
@@ -382,10 +377,13 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
static void smc_conn_save_peer_info(struct smc_sock *smc,
				    struct smc_clc_msg_accept_confirm *clc)
{
	smc->conn.peer_conn_idx = clc->conn_idx;
	int bufsize = smc_uncompress_bufsize(clc->rmbe_size);

	smc->conn.peer_rmbe_idx = clc->rmbe_idx;
	smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
	smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size);
	smc->conn.peer_rmbe_size = bufsize;
	atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
	smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
}

static void smc_link_save_peer_info(struct smc_link *link,
@@ -398,165 +396,186 @@ static void smc_link_save_peer_info(struct smc_link *link,
	link->peer_mtu = clc->qp_mtu;
}

/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc)
{
	struct smc_clc_msg_accept_confirm aclc;
	int local_contact = SMC_FIRST_CONTACT;
	struct smc_ib_device *smcibdev;
	struct smc_link *link;
	u8 srv_first_contact;
	int reason_code = 0;
	int rc = 0;
	u8 ibport;

	sock_hold(&smc->sk); /* sock put in passive closing */
	smc->use_fallback = true;
	smc_copy_sock_settings_to_clc(smc);
	if (smc->sk.sk_state == SMC_INIT)
		smc->sk.sk_state = SMC_ACTIVE;
	return 0;
}

	if (smc->use_fallback)
		goto out_connected;
/* decline and fall back during connect */
static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
{
	int rc;

	if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
		/* peer has not signalled SMC-capability */
		smc->use_fallback = true;
		goto out_connected;
	if (reason_code < 0) /* error, fallback is not possible */
		return reason_code;
	if (reason_code != SMC_CLC_DECL_REPLY) {
		rc = smc_clc_send_decline(smc, reason_code);
		if (rc < 0)
			return rc;
	}
	return smc_connect_fallback(smc);
}

	/* IPSec connections opt out of SMC-R optimizations */
	if (using_ipsec(smc)) {
		reason_code = SMC_CLC_DECL_IPSEC;
		goto decline_rdma;
/* abort connecting */
static int smc_connect_abort(struct smc_sock *smc, int reason_code,
			     int local_contact)
{
	if (local_contact == SMC_FIRST_CONTACT)
		smc_lgr_forget(smc->conn.lgr);
	mutex_unlock(&smc_create_lgr_pending);
	smc_conn_free(&smc->conn);
	if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
		sock_put(&smc->sk); /* passive closing */
	return reason_code;
}

/* check if there is a rdma device available for this connection. */
/* called for connect and listen */
static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
			  u8 *ibport)
{
	int reason_code = 0;

	/* PNET table look up: search active ib_device and port
	 * within same PNETID that also contains the ethernet device
	 * used for the internal TCP socket
	 */
	smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport);
	if (!smcibdev) {
	smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport);
	if (!(*ibdev))
		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
		goto decline_rdma;

	return reason_code;
}

/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc,
			   struct smc_clc_msg_accept_confirm *aclc,
			   struct smc_ib_device *ibdev, u8 ibport)
{
	int rc = 0;

	/* do inband token exchange */
	reason_code = smc_clc_send_proposal(smc, smcibdev, ibport);
	if (reason_code < 0) {
		rc = reason_code;
		goto out_err;
	}
	if (reason_code > 0) /* configuration error */
		goto decline_rdma;
	rc = smc_clc_send_proposal(smc, ibdev, ibport);
	if (rc)
		return rc;
	/* receive SMC Accept CLC message */
	reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc),
				       SMC_CLC_ACCEPT);
	if (reason_code < 0) {
		rc = reason_code;
		goto out_err;
	return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
}
	if (reason_code > 0)
		goto decline_rdma;

	srv_first_contact = aclc.hdr.flag;
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc,
			    struct smc_clc_msg_accept_confirm *aclc,
			    struct smc_ib_device *ibdev, u8 ibport)
{
	int local_contact = SMC_FIRST_CONTACT;
	struct smc_link *link;
	int reason_code = 0;

	mutex_lock(&smc_create_lgr_pending);
	local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
					srv_first_contact);
	local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
					aclc->hdr.flag);
	if (local_contact < 0) {
		rc = local_contact;
		if (rc == -ENOMEM)
		if (local_contact == -ENOMEM)
			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
		else if (rc == -ENOLINK)
		else if (local_contact == -ENOLINK)
			reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
		else
			reason_code = SMC_CLC_DECL_INTERR; /* other error */
		goto decline_rdma_unlock;
		return smc_connect_abort(smc, reason_code, 0);
	}
	link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];

	smc_conn_save_peer_info(smc, &aclc);
	smc_conn_save_peer_info(smc, aclc);

	/* create send buffer and rmb */
	rc = smc_buf_create(smc);
	if (rc) {
		reason_code = SMC_CLC_DECL_MEM;
		goto decline_rdma_unlock;
	}
	if (smc_buf_create(smc))
		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);

	if (local_contact == SMC_FIRST_CONTACT)
		smc_link_save_peer_info(link, &aclc);
		smc_link_save_peer_info(link, aclc);

	rc = smc_rmb_rtoken_handling(&smc->conn, &aclc);
	if (rc) {
		reason_code = SMC_CLC_DECL_INTERR;
		goto decline_rdma_unlock;
	}
	if (smc_rmb_rtoken_handling(&smc->conn, aclc))
		return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
					 local_contact);

	smc_close_init(smc);
	smc_rx_init(smc);

	if (local_contact == SMC_FIRST_CONTACT) {
		rc = smc_ib_ready_link(link);
		if (rc) {
			reason_code = SMC_CLC_DECL_INTERR;
			goto decline_rdma_unlock;
		}
		if (smc_ib_ready_link(link))
			return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
						 local_contact);
	} else {
		if (!smc->conn.rmb_desc->reused) {
			if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) {
				reason_code = SMC_CLC_DECL_INTERR;
				goto decline_rdma_unlock;
			}
		}
		if (!smc->conn.rmb_desc->reused &&
		    smc_reg_rmb(link, smc->conn.rmb_desc, true))
			return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
						 local_contact);
	}
	smc_rmb_sync_sg_for_device(&smc->conn);

	rc = smc_clc_send_confirm(smc);
	if (rc)
		goto out_err_unlock;
	reason_code = smc_clc_send_confirm(smc);
	if (reason_code)
		return smc_connect_abort(smc, reason_code, local_contact);

	smc_tx_init(smc);

	if (local_contact == SMC_FIRST_CONTACT) {
		/* QP confirmation over RoCE fabric */
		reason_code = smc_clnt_conf_first_link(smc);
		if (reason_code < 0) {
			rc = reason_code;
			goto out_err_unlock;
		}
		if (reason_code > 0)
			goto decline_rdma_unlock;
		if (reason_code)
			return smc_connect_abort(smc, reason_code,
						 local_contact);
	}

	mutex_unlock(&smc_create_lgr_pending);
	smc_tx_init(smc);

out_connected:
	smc_copy_sock_settings_to_clc(smc);
	if (smc->sk.sk_state == SMC_INIT)
		smc->sk.sk_state = SMC_ACTIVE;

	return rc ? rc : local_contact;

decline_rdma_unlock:
	if (local_contact == SMC_FIRST_CONTACT)
		smc_lgr_forget(smc->conn.lgr);
	mutex_unlock(&smc_create_lgr_pending);
	smc_conn_free(&smc->conn);
decline_rdma:
	/* RDMA setup failed, switch back to TCP */
	smc->use_fallback = true;
	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
		rc = smc_clc_send_decline(smc, reason_code);
		if (rc < 0)
			goto out_err;
	return 0;
}
	goto out_connected;

out_err_unlock:
	if (local_contact == SMC_FIRST_CONTACT)
		smc_lgr_forget(smc->conn.lgr);
	mutex_unlock(&smc_create_lgr_pending);
	smc_conn_free(&smc->conn);
out_err:
	if (smc->sk.sk_state == SMC_INIT)
		sock_put(&smc->sk); /* passive closing */
	return rc;
/* perform steps before actually connecting */
static int __smc_connect(struct smc_sock *smc)
{
	struct smc_clc_msg_accept_confirm aclc;
	struct smc_ib_device *ibdev;
	int rc = 0;
	u8 ibport;

	sock_hold(&smc->sk); /* sock put in passive closing */

	if (smc->use_fallback)
		return smc_connect_fallback(smc);

	/* if peer has not signalled SMC-capability, fall back */
	if (!tcp_sk(smc->clcsock->sk)->syn_smc)
		return smc_connect_fallback(smc);

	/* IPSec connections opt out of SMC-R optimizations */
	if (using_ipsec(smc))
		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);

	/* check if a RDMA device is available; if not, fall back */
	if (smc_check_rdma(smc, &ibdev, &ibport))
		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);

	/* perform CLC handshake */
	rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
	if (rc)
		return smc_connect_decline_fallback(smc, rc);

	/* connect using rdma */
	rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
	if (rc)
		return smc_connect_decline_fallback(smc, rc);

	return 0;
}

static int smc_connect(struct socket *sock, struct sockaddr *addr,
@@ -592,8 +611,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
	if (rc)
		goto out;

	/* setup RDMA connection */
	rc = smc_connect_rdma(smc);
	rc = __smc_connect(smc);
	if (rc < 0)
		goto out;
	else
@@ -791,182 +809,239 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
	return 0;
}

/* setup for RDMA connection of server */
static void smc_listen_work(struct work_struct *work)
/* listen worker: finish */
static void smc_listen_out(struct smc_sock *new_smc)
{
	struct smc_sock *new_smc = container_of(work, struct smc_sock,
						smc_listen_work);
	struct smc_clc_msg_proposal_prefix *pclc_prfx;
	struct socket *newclcsock = new_smc->clcsock;
	struct smc_sock *lsmc = new_smc->listen_smc;
	struct smc_clc_msg_accept_confirm cclc;
	int local_contact = SMC_REUSE_CONTACT;
	struct sock *newsmcsk = &new_smc->sk;
	struct smc_clc_msg_proposal *pclc;
	struct smc_ib_device *smcibdev;
	u8 buf[SMC_CLC_MAX_LEN];
	struct smc_link *link;
	int reason_code = 0;
	int rc = 0;
	u8 ibport;

	if (new_smc->use_fallback)
		goto out_connected;
	lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
	if (lsmc->sk.sk_state == SMC_LISTEN) {
		smc_accept_enqueue(&lsmc->sk, newsmcsk);
	} else { /* no longer listening */
		smc_close_non_accepted(newsmcsk);
	}
	release_sock(&lsmc->sk);

	/* check if peer is smc capable */
	if (!tcp_sk(newclcsock->sk)->syn_smc) {
		new_smc->use_fallback = true;
		goto out_connected;
	/* Wake up accept */
	lsmc->sk.sk_data_ready(&lsmc->sk);
	sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
}

	/* do inband token exchange -
	 *wait for and receive SMC Proposal CLC message
	 */
	reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
				       SMC_CLC_PROPOSAL);
	if (reason_code < 0)
		goto out_err;
	if (reason_code > 0)
		goto decline_rdma;
/* listen worker: finish in state connected */
static void smc_listen_out_connected(struct smc_sock *new_smc)
{
	struct sock *newsmcsk = &new_smc->sk;

	/* IPSec connections opt out of SMC-R optimizations */
	if (using_ipsec(new_smc)) {
		reason_code = SMC_CLC_DECL_IPSEC;
		goto decline_rdma;
	sk_refcnt_debug_inc(newsmcsk);
	if (newsmcsk->sk_state == SMC_INIT)
		newsmcsk->sk_state = SMC_ACTIVE;

	smc_listen_out(new_smc);
}

	/* PNET table look up: search active ib_device and port
	 * within same PNETID that also contains the ethernet device
	 * used for the internal TCP socket
	 */
	smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport);
	if (!smcibdev) {
		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
		goto decline_rdma;
/* listen worker: finish in error state */
static void smc_listen_out_err(struct smc_sock *new_smc)
{
	struct sock *newsmcsk = &new_smc->sk;

	if (newsmcsk->sk_state == SMC_INIT)
		sock_put(&new_smc->sk); /* passive closing */
	newsmcsk->sk_state = SMC_CLOSED;
	smc_conn_free(&new_smc->conn);

	smc_listen_out(new_smc);
}

	pclc = (struct smc_clc_msg_proposal *)&buf;
/* listen worker: decline and fall back if possible */
static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
			       int local_contact)
{
	/* RDMA setup failed, switch back to TCP */
	if (local_contact == SMC_FIRST_CONTACT)
		smc_lgr_forget(new_smc->conn.lgr);
	if (reason_code < 0) { /* error, no fallback possible */
		smc_listen_out_err(new_smc);
		return;
	}
	smc_conn_free(&new_smc->conn);
	new_smc->use_fallback = true;
	if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
		if (smc_clc_send_decline(new_smc, reason_code) < 0) {
			smc_listen_out_err(new_smc);
			return;
		}
	}
	smc_listen_out_connected(new_smc);
}

/* listen worker: check prefixes */
static int smc_listen_rdma_check(struct smc_sock *new_smc,
				 struct smc_clc_msg_proposal *pclc)
{
	struct smc_clc_msg_proposal_prefix *pclc_prfx;
	struct socket *newclcsock = new_smc->clcsock;

	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
	if (smc_clc_prfx_match(newclcsock, pclc_prfx))
		return SMC_CLC_DECL_CNFERR;

	rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
	if (rc) {
		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
		goto decline_rdma;
	return 0;
}

/* listen worker: initialize connection and buffers */
static int smc_listen_rdma_init(struct smc_sock *new_smc,
				struct smc_clc_msg_proposal *pclc,
				struct smc_ib_device *ibdev, u8 ibport,
				int *local_contact)
{
	/* allocate connection / link group */
	mutex_lock(&smc_create_lgr_pending);
	local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
					0);
	if (local_contact < 0) {
		rc = local_contact;
		if (rc == -ENOMEM)
			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
		goto decline_rdma_unlock;
	*local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
	if (*local_contact < 0) {
		if (*local_contact == -ENOMEM)
			return SMC_CLC_DECL_MEM;/* insufficient memory*/
		return SMC_CLC_DECL_INTERR; /* other error */
	}
	link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];

	/* create send buffer and rmb */
	rc = smc_buf_create(new_smc);
	if (rc) {
		reason_code = SMC_CLC_DECL_MEM;
		goto decline_rdma_unlock;
	if (smc_buf_create(new_smc))
		return SMC_CLC_DECL_MEM;

	return 0;
}

	smc_close_init(new_smc);
	smc_rx_init(new_smc);
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
{
	struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];

	if (local_contact != SMC_FIRST_CONTACT) {
		if (!new_smc->conn.rmb_desc->reused) {
			if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) {
				reason_code = SMC_CLC_DECL_INTERR;
				goto decline_rdma_unlock;
			}
			if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
				return SMC_CLC_DECL_INTERR;
		}
	}
	smc_rmb_sync_sg_for_device(&new_smc->conn);

	rc = smc_clc_send_accept(new_smc, local_contact);
	if (rc)
		goto out_err_unlock;
	return 0;
}

/* listen worker: finish RDMA setup */
static void smc_listen_rdma_finish(struct smc_sock *new_smc,
				   struct smc_clc_msg_accept_confirm *cclc,
				   int local_contact)
{
	struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
	int reason_code = 0;

	/* receive SMC Confirm CLC message */
	reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
				       SMC_CLC_CONFIRM);
	if (reason_code < 0)
		goto out_err_unlock;
	if (reason_code > 0)
		goto decline_rdma_unlock;
	smc_conn_save_peer_info(new_smc, &cclc);
	if (local_contact == SMC_FIRST_CONTACT)
		smc_link_save_peer_info(link, &cclc);
		smc_link_save_peer_info(link, cclc);

	rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
	if (rc) {
	if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
		reason_code = SMC_CLC_DECL_INTERR;
		goto decline_rdma_unlock;
		goto decline;
	}

	if (local_contact == SMC_FIRST_CONTACT) {
		rc = smc_ib_ready_link(link);
		if (rc) {
		if (smc_ib_ready_link(link)) {
			reason_code = SMC_CLC_DECL_INTERR;
			goto decline_rdma_unlock;
			goto decline;
		}
		/* QP confirmation over RoCE fabric */
		reason_code = smc_serv_conf_first_link(new_smc);
		if (reason_code < 0)
			/* peer is not aware of a problem */
			goto out_err_unlock;
		if (reason_code > 0)
			goto decline_rdma_unlock;
		if (reason_code)
			goto decline;
	}
	return;

	smc_tx_init(new_smc);
decline:
	mutex_unlock(&smc_create_lgr_pending);
	smc_listen_decline(new_smc, reason_code, local_contact);
}

out_connected:
	sk_refcnt_debug_inc(newsmcsk);
	if (newsmcsk->sk_state == SMC_INIT)
		newsmcsk->sk_state = SMC_ACTIVE;
enqueue:
	lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
	if (lsmc->sk.sk_state == SMC_LISTEN) {
		smc_accept_enqueue(&lsmc->sk, newsmcsk);
	} else { /* no longer listening */
		smc_close_non_accepted(newsmcsk);
/* setup for RDMA connection of server */
static void smc_listen_work(struct work_struct *work)
{
	struct smc_sock *new_smc = container_of(work, struct smc_sock,
						smc_listen_work);
	struct socket *newclcsock = new_smc->clcsock;
	struct smc_clc_msg_accept_confirm cclc;
	struct smc_clc_msg_proposal *pclc;
	struct smc_ib_device *ibdev;
	u8 buf[SMC_CLC_MAX_LEN];
	int local_contact = 0;
	int reason_code = 0;
	int rc = 0;
	u8 ibport;

	if (new_smc->use_fallback) {
		smc_listen_out_connected(new_smc);
		return;
	}
	release_sock(&lsmc->sk);

	/* Wake up accept */
	lsmc->sk.sk_data_ready(&lsmc->sk);
	sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
	/* check if peer is smc capable */
	if (!tcp_sk(newclcsock->sk)->syn_smc) {
		new_smc->use_fallback = true;
		smc_listen_out_connected(new_smc);
		return;
	}

decline_rdma_unlock:
	if (local_contact == SMC_FIRST_CONTACT)
		smc_lgr_forget(new_smc->conn.lgr);
	/* do inband token exchange -
	 * wait for and receive SMC Proposal CLC message
	 */
	pclc = (struct smc_clc_msg_proposal *)&buf;
	reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
				       SMC_CLC_PROPOSAL);
	if (reason_code) {
		smc_listen_decline(new_smc, reason_code, 0);
		return;
	}

	/* IPSec connections opt out of SMC-R optimizations */
	if (using_ipsec(new_smc)) {
		smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
		return;
	}

	mutex_lock(&smc_create_lgr_pending);
	smc_close_init(new_smc);
	smc_rx_init(new_smc);
	smc_tx_init(new_smc);

	/* check if RDMA is available */
	if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
	    smc_listen_rdma_check(new_smc, pclc) ||
	    smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
				 &local_contact) ||
	    smc_listen_rdma_reg(new_smc, local_contact)) {
		/* SMC not supported, decline */
		mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
	/* RDMA setup failed, switch back to TCP */
	smc_conn_free(&new_smc->conn);
	new_smc->use_fallback = true;
	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
		if (smc_clc_send_decline(new_smc, reason_code) < 0)
			goto out_err;
		smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
		return;
	}
	goto out_connected;

out_err_unlock:
	if (local_contact == SMC_FIRST_CONTACT)
		smc_lgr_forget(new_smc->conn.lgr);
	/* send SMC Accept CLC message */
	rc = smc_clc_send_accept(new_smc, local_contact);
	if (rc) {
		mutex_unlock(&smc_create_lgr_pending);
out_err:
	if (newsmcsk->sk_state == SMC_INIT)
		sock_put(&new_smc->sk); /* passive closing */
	newsmcsk->sk_state = SMC_CLOSED;
	smc_conn_free(&new_smc->conn);
	goto enqueue; /* queue new sock with sk_err set */
		smc_listen_decline(new_smc, rc, local_contact);
		return;
	}

	/* receive SMC Confirm CLC message */
	reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
				       SMC_CLC_CONFIRM);
	if (reason_code) {
		mutex_unlock(&smc_create_lgr_pending);
		smc_listen_decline(new_smc, reason_code, local_contact);
		return;
	}

	/* finish worker */
	smc_listen_rdma_finish(new_smc, &cclc, local_contact);
	smc_conn_save_peer_info(new_smc, &cclc);
	mutex_unlock(&smc_create_lgr_pending);
	smc_listen_out_connected(new_smc);
}

static void smc_tcp_listen_work(struct work_struct *work)
@@ -1227,7 +1302,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
			if (sk->sk_state == SMC_INIT &&
			    mask & EPOLLOUT &&
			    smc->clcsock->sk->sk_state != TCP_CLOSE) {
				rc = smc_connect_rdma(smc);
				rc = __smc_connect(smc);
				if (rc < 0)
					mask |= EPOLLERR;
				/* success cases including fallback */
@@ -1421,7 +1496,7 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
		/* output queue size (not send + not acked) */
		if (smc->sk.sk_state == SMC_LISTEN)
			return -EINVAL;
		answ = smc->conn.sndbuf_size -
		answ = smc->conn.sndbuf_desc->len -
					atomic_read(&smc->conn.sndbuf_space);
		break;
	case SIOCOUTQNSD:
@@ -1637,19 +1712,7 @@ out_pnet:

static void __exit smc_exit(void)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_freeing_list);

	spin_lock_bh(&smc_lgr_list.lock);
	if (!list_empty(&smc_lgr_list.list))
		list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
	spin_unlock_bh(&smc_lgr_list.lock);
	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
		list_del_init(&lgr->list);
		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
		cancel_delayed_work_sync(&lgr->free_work);
		smc_lgr_free(lgr); /* free link group */
	}
	smc_core_exit();
	static_branch_disable(&tcp_have_smc);
	smc_ib_unregister_client();
	sock_unregister(PF_SMC);
+2 −44
Original line number Diff line number Diff line
@@ -118,7 +118,7 @@ struct smc_connection {
	struct rb_node		alert_node;
	struct smc_link_group	*lgr;		/* link group of connection */
	u32			alert_token_local; /* unique conn. id */
	u8			peer_conn_idx;	/* from tcp handshake */
	u8			peer_rmbe_idx;	/* from tcp handshake */
	int			peer_rmbe_size;	/* size of peer rx buffer */
	atomic_t		peer_rmbe_space;/* remaining free bytes in peer
						 * rmbe
@@ -126,9 +126,7 @@ struct smc_connection {
	int			rtoken_idx;	/* idx to peer RMB rkey/addr */

	struct smc_buf_desc	*sndbuf_desc;	/* send buffer descriptor */
	int			sndbuf_size;	/* sndbuf size <== sock wmem */
	struct smc_buf_desc	*rmb_desc;	/* RMBE descriptor */
	int			rmbe_size;	/* RMBE size <== sock rmem */
	int			rmbe_size_short;/* compressed notation */
	int			rmbe_update_limit;
						/* lower limit for consumer
@@ -153,6 +151,7 @@ struct smc_connection {
	u16			tx_cdc_seq;	/* sequence # for CDC send */
	spinlock_t		send_lock;	/* protect wr_sends */
	struct delayed_work	tx_work;	/* retry of smc_cdc_msg_send */
	u32			tx_off;		/* base offset in peer rmb */

	struct smc_host_cdc_msg	local_rx_ctrl;	/* filled during event_handl.
						 * .prod cf. TCP rcv_nxt
@@ -221,41 +220,6 @@ static inline u32 ntoh24(u8 *net)
	return be32_to_cpu(t);
}

#define SMC_BUF_MIN_SIZE 16384		/* minimum size of an RMB */

#define SMC_RMBE_SIZES	16	/* number of distinct sizes for an RMBE */
/* theoretically, the RFC states that largest size would be 512K,
 * i.e. compressed 5 and thus 6 sizes (0..5), despite
 * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
 */

/* convert the RMB size into the compressed notation - minimum 16K.
 * In contrast to plain ilog2, this rounds towards the next power of 2,
 * so the socket application gets at least its desired sndbuf / rcvbuf size.
 */
static inline u8 smc_compress_bufsize(int size)
{
	u8 compressed;

	if (size <= SMC_BUF_MIN_SIZE)
		return 0;

	size = (size - 1) >> 14;
	compressed = ilog2(size) + 1;
	if (compressed >= SMC_RMBE_SIZES)
		compressed = SMC_RMBE_SIZES - 1;
	return compressed;
}

/* convert the RMB size from compressed notation into integer */
static inline int smc_uncompress_bufsize(u8 compressed)
{
	u32 size;

	size = 0x00000001 << (((int)compressed) + 14);
	return (int)size;
}

#ifdef CONFIG_XFRM
static inline bool using_ipsec(struct smc_sock *smc)
{
@@ -269,12 +233,6 @@ static inline bool using_ipsec(struct smc_sock *smc)
}
#endif

struct smc_clc_msg_local;

void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc,
		    struct smc_ib_device *smcibdev, u8 ibport,
		    struct smc_clc_msg_local *lcl, int srv_first_contact);
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
void smc_close_non_accepted(struct sock *sk);

+26 −29

File changed.

Preview size limit exceeded, changes collapsed.

+2 −2
Original line number Diff line number Diff line
@@ -442,7 +442,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
	hton24(cclc.qpn, link->roce_qp->qp_num);
	cclc.rmb_rkey =
		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
	cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
	cclc.rmbe_size = conn->rmbe_size_short;
@@ -494,7 +494,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
	hton24(aclc.qpn, link->roce_qp->qp_num);
	aclc.rmb_rkey =
		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
	aclc.rmbe_idx = 1;			/* as long as 1 RMB = 1 RMBE */
	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
	aclc.qp_mtu = link->path_mtu;
	aclc.rmbe_size = conn->rmbe_size_short,
+1 −1
Original line number Diff line number Diff line
@@ -97,7 +97,7 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
	struct smc_clc_msg_local lcl;
	u8 qpn[3];		/* QP number */
	__be32 rmb_rkey;	/* RMB rkey */
	u8 conn_idx;		/* Connection index, which RMBE in RMB */
	u8 rmbe_idx;		/* Index of RMBE in RMB */
	__be32 rmbe_alert_token;/* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD)
	u8 rmbe_size : 4,	/* RMBE buf size (compressed notation) */
Loading