Commit fe4f961e authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-smc-add-event-based-framework-for-LLC-msgs'



Karsten Graul says:

====================
net/smc: add event-based framework for LLC msgs

These patches are the next step towards SMC-R link failover support. They add
a new framework to handle Link Layer Control (LLC) messages and adapt the
existing code to use the new framework.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1569a3c4 41a211d8
Loading
Loading
Loading
Loading
+60 −48
Original line number Diff line number Diff line
@@ -382,22 +382,24 @@ static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
static int smcr_clnt_conf_first_link(struct smc_sock *smc)
{
	struct smc_link *link = smc->conn.lnk;
	int rest;
	struct smc_llc_qentry *qentry;
	int rc;

	link->lgr->type = SMC_LGR_SINGLE;

	/* receive CONFIRM LINK request from server over RoCE fabric */
	rest = wait_for_completion_interruptible_timeout(
		&link->llc_confirm,
		SMC_LLC_WAIT_FIRST_TIME);
	if (rest <= 0) {
	qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
			      SMC_LLC_CONFIRM_LINK);
	if (!qentry) {
		struct smc_clc_msg_decline dclc;

		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
	}

	if (link->llc_confirm_rc)
	rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
	smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
	if (rc)
		return SMC_CLC_DECL_RMBE_EC;

	rc = smc_ib_modify_qp_rts(link);
@@ -409,31 +411,30 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
		return SMC_CLC_DECL_ERR_REGRMB;

	/* confirm_rkey is implicit on 1st contact */
	smc->conn.rmb_desc->is_conf_rkey = true;

	/* send CONFIRM LINK response over RoCE fabric */
	rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
	if (rc < 0)
		return SMC_CLC_DECL_TIMEOUT_CL;

	/* receive ADD LINK request from server over RoCE fabric */
	rest = wait_for_completion_interruptible_timeout(&link->llc_add,
							 SMC_LLC_WAIT_TIME);
	if (rest <= 0) {
	smc_llc_link_active(link);

	/* optional 2nd link, receive ADD LINK request from server */
	qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
			      SMC_LLC_ADD_LINK);
	if (!qentry) {
		struct smc_clc_msg_decline dclc;

		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
		if (rc == -EAGAIN)
			rc = 0; /* no DECLINE received, go with one link */
		return rc;
	}

	/* send add link reject message, only one link supported for now */
	rc = smc_llc_send_add_link(link,
				   link->smcibdev->mac[link->ibport - 1],
				   link->gid, SMC_LLC_RESP);
	if (rc < 0)
		return SMC_CLC_DECL_TIMEOUT_AL;

	smc_llc_link_active(link);

	smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
	/* tbd: call smc_llc_cli_add_link(link, qentry); */
	return 0;
}

@@ -613,8 +614,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
			    struct smc_clc_msg_accept_confirm *aclc,
			    struct smc_init_info *ini)
{
	int i, reason_code = 0;
	struct smc_link *link;
	int reason_code = 0;

	ini->is_smcd = false;
	ini->ib_lcl = &aclc->lcl;
@@ -627,10 +628,28 @@ static int smc_connect_rdma(struct smc_sock *smc,
		mutex_unlock(&smc_client_lgr_pending);
		return reason_code;
	}
	link = smc->conn.lnk;

	smc_conn_save_peer_info(smc, aclc);

	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
		link = smc->conn.lnk;
	} else {
		/* set link that was assigned by server */
		link = NULL;
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			struct smc_link *l = &smc->conn.lgr->lnk[i];

			if (l->peer_qpn == ntoh24(aclc->qpn)) {
				link = l;
				break;
			}
		}
		if (!link)
			return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK,
						 ini->cln_first_contact);
		smc->conn.lnk = link;
	}

	/* create send buffer and rmb */
	if (smc_buf_create(smc, false))
		return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
@@ -666,7 +685,9 @@ static int smc_connect_rdma(struct smc_sock *smc,

	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
		/* QP confirmation over RoCE fabric */
		smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
		reason_code = smcr_clnt_conf_first_link(smc);
		smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
		if (reason_code)
			return smc_connect_abort(smc, reason_code,
						 ini->cln_first_contact);
@@ -1019,9 +1040,11 @@ void smc_close_non_accepted(struct sock *sk)
static int smcr_serv_conf_first_link(struct smc_sock *smc)
{
	struct smc_link *link = smc->conn.lnk;
	int rest;
	struct smc_llc_qentry *qentry;
	int rc;

	link->lgr->type = SMC_LGR_SINGLE;

	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
		return SMC_CLC_DECL_ERR_REGRMB;

@@ -1031,40 +1054,27 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
		return SMC_CLC_DECL_TIMEOUT_CL;

	/* receive CONFIRM LINK response from client over the RoCE fabric */
	rest = wait_for_completion_interruptible_timeout(
		&link->llc_confirm_resp,
		SMC_LLC_WAIT_FIRST_TIME);
	if (rest <= 0) {
	qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
			      SMC_LLC_CONFIRM_LINK);
	if (!qentry) {
		struct smc_clc_msg_decline dclc;

		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
	}

	if (link->llc_confirm_resp_rc)
	rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
	smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
	if (rc)
		return SMC_CLC_DECL_RMBE_EC;

	/* send ADD LINK request to client over the RoCE fabric */
	rc = smc_llc_send_add_link(link,
				   link->smcibdev->mac[link->ibport - 1],
				   link->gid, SMC_LLC_REQ);
	if (rc < 0)
		return SMC_CLC_DECL_TIMEOUT_AL;

	/* receive ADD LINK response from client over the RoCE fabric */
	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
							 SMC_LLC_WAIT_TIME);
	if (rest <= 0) {
		struct smc_clc_msg_decline dclc;

		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
	}
	/* confirm_rkey is implicit on 1st contact */
	smc->conn.rmb_desc->is_conf_rkey = true;

	smc_llc_link_active(link);

	/* initial contact - try to establish second link */
	/* tbd: call smc_llc_srv_add_link(link); */
	return 0;
}

@@ -1240,7 +1250,9 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
			goto decline;
		}
		/* QP confirmation over RoCE fabric */
		smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
		reason_code = smcr_serv_conf_first_link(new_smc);
		smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
		if (reason_code)
			goto decline;
	}
+1 −0
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@
#define SMC_CLC_DECL_GETVLANERR	0x03080000  /* err to get vlan id of ip device*/
#define SMC_CLC_DECL_ISMVLANERR	0x03090000  /* err to reg vlan id on ism dev  */
#define SMC_CLC_DECL_NOACTLINK	0x030a0000  /* no active smc-r link in lgr    */
#define SMC_CLC_DECL_NOSRVLINK	0x030b0000  /* SMC-R link from srv not found  */
#define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */
#define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR	0x09990000  /* internal error		      */
+54 −9
Original line number Diff line number Diff line
@@ -200,7 +200,6 @@ static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
{
	if (lnk->state == SMC_LNK_ACTIVE &&
	    !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
		smc_llc_link_deleting(lnk);
		return 0;
	}
	return -ENOTCONN;
@@ -263,6 +262,7 @@ static void smc_lgr_free_work(struct work_struct *work)
			if (smc_link_usable(lnk))
				lnk->state = SMC_LNK_INACTIVE;
		}
		wake_up_interruptible_all(&lgr->llc_waiter);
	}
	smc_lgr_free(lgr);
}
@@ -445,13 +445,11 @@ out:
}

static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
			   struct smc_link *lnk)
			   struct smc_link_group *lgr)
{
	struct smc_link_group *lgr = lnk->lgr;

	if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
		/* unregister rmb with peer */
		smc_llc_do_delete_rkey(lnk, rmb_desc);
		smc_llc_do_delete_rkey(lgr, rmb_desc);
		rmb_desc->is_conf_rkey = false;
	}
	if (rmb_desc->is_reg_err) {
@@ -474,7 +472,7 @@ static void smc_buf_unuse(struct smc_connection *conn,
	if (conn->rmb_desc && lgr->is_smcd)
		conn->rmb_desc->used = 0;
	else if (conn->rmb_desc)
		smcr_buf_unuse(conn->rmb_desc, conn->lnk);
		smcr_buf_unuse(conn->rmb_desc, lgr);
}

/* remove a finished connection from its link group */
@@ -696,6 +694,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
			if (smc_link_usable(lnk))
				lnk->state = SMC_LNK_INACTIVE;
		}
		wake_up_interruptible_all(&lgr->llc_waiter);
	}
}

@@ -767,8 +766,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
			continue;
		/* tbd - terminate only when no more links are active */
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			if (!smc_link_usable(&lgr->lnk[i]) ||
			    lgr->lnk[i].state == SMC_LNK_DELETING)
			if (!smc_link_usable(&lgr->lnk[i]))
				continue;
			if (lgr->lnk[i].smcibdev == smcibdev &&
			    lgr->lnk[i].ibport == ibport) {
@@ -1167,7 +1165,6 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
		if (!smc_link_usable(lnk))
			continue;
		if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
			smcr_buf_unuse(buf_desc, lnk);
			rc = -ENOMEM;
			goto out;
		}
@@ -1273,6 +1270,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)

	if (!is_smcd) {
		if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
			smcr_buf_unuse(buf_desc, lgr);
			return -ENOMEM;
		}
	}
@@ -1368,6 +1366,53 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
	return -ENOSPC;
}

static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
				   u32 rkey)
{
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		if (test_bit(i, lgr->rtokens_used_mask) &&
		    lgr->rtokens[i][lnk_idx].rkey == rkey)
			return i;
	}
	return -ENOENT;
}

/* set rtoken for a new link to an existing rmb */
void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
		    __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
{
	int rtok_idx;

	rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
	if (rtok_idx == -ENOENT)
		return;
	lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
	lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
}

/* set rtoken for a new link whose link_id is given */
void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
		     __be64 nw_vaddr, __be32 nw_rkey)
{
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
	bool found = false;
	int link_idx;

	for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
		if (lgr->lnk[link_idx].link_id == link_id) {
			found = true;
			break;
		}
	}
	if (!found)
		return;
	lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
	lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
}

/* add a new rtoken from peer */
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
{
+38 −12
Original line number Diff line number Diff line
@@ -36,7 +36,6 @@ enum smc_link_state { /* possible states of a link */
	SMC_LNK_INACTIVE,	/* link is inactive */
	SMC_LNK_ACTIVATING,	/* link is being activated */
	SMC_LNK_ACTIVE,		/* link is active */
	SMC_LNK_DELETING,	/* link is being deleted */
};

#define SMC_WR_BUF_SIZE		48	/* size of work request buffer */
@@ -120,20 +119,9 @@ struct smc_link {
	struct smc_link_group	*lgr;		/* parent link group */

	enum smc_link_state	state;		/* state of link */
	struct completion	llc_confirm;	/* wait for rx of conf link */
	struct completion	llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
	int			llc_confirm_rc; /* rc from confirm link msg */
	int			llc_confirm_resp_rc; /* rc from conf_resp msg */
	struct completion	llc_add;	/* wait for rx of add link */
	struct completion	llc_add_resp;	/* wait for rx of add link rsp*/
	struct delayed_work	llc_testlink_wrk; /* testlink worker */
	struct completion	llc_testlink_resp; /* wait for rx of testlink */
	int			llc_testlink_time; /* testlink interval */
	struct completion	llc_confirm_rkey_resp; /* w4 rx of cnf rkey */
	int			llc_confirm_rkey_resp_rc; /* rc from cnf rkey */
	struct completion	llc_delete_rkey_resp; /* w4 rx of del rkey */
	int			llc_delete_rkey_resp_rc; /* rc from del rkey */
	struct mutex		llc_delete_rkey_mutex; /* serialize usage */
};

/* For now we just allow one parallel link per link group. The SMC protocol
@@ -197,6 +185,28 @@ struct smc_rtoken { /* address/key of remote RMB */

struct smcd_dev;

enum smc_lgr_type {				/* redundancy state of lgr */
	SMC_LGR_NONE,			/* no active links, lgr to be deleted */
	SMC_LGR_SINGLE,			/* 1 active RNIC on each peer */
	SMC_LGR_SYMMETRIC,		/* 2 active RNICs on each peer */
	SMC_LGR_ASYMMETRIC_PEER,	/* local has 2, peer 1 active RNICs */
	SMC_LGR_ASYMMETRIC_LOCAL,	/* local has 1, peer 2 active RNICs */
};

enum smc_llc_flowtype {
	SMC_LLC_FLOW_NONE	= 0,
	SMC_LLC_FLOW_ADD_LINK	= 2,
	SMC_LLC_FLOW_DEL_LINK	= 4,
	SMC_LLC_FLOW_RKEY	= 6,
};

struct smc_llc_qentry;

struct smc_llc_flow {
	enum smc_llc_flowtype type;
	struct smc_llc_qentry *qentry;
};

struct smc_link_group {
	struct list_head	list;
	struct rb_root		conns_all;	/* connection tree */
@@ -232,12 +242,24 @@ struct smc_link_group {
			DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
						/* used rtoken elements */
			u8			next_link_id;
			enum smc_lgr_type	type;
						/* redundancy state */
			struct list_head	llc_event_q;
						/* queue for llc events */
			spinlock_t		llc_event_q_lock;
						/* protects llc_event_q */
			struct work_struct	llc_event_work;
						/* llc event worker */
			wait_queue_head_t	llc_waiter;
						/* w4 next llc event */
			struct smc_llc_flow	llc_flow_lcl;
						/* llc local control field */
			struct smc_llc_flow	llc_flow_rmt;
						/* llc remote control field */
			struct smc_llc_qentry	*delayed_event;
						/* arrived when flow active */
			spinlock_t		llc_flow_lock;
						/* protects llc flow */
			int			llc_testlink_time;
						/* link keep alive time */
		};
@@ -329,6 +351,10 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
			    struct smc_clc_msg_accept_confirm *clc);
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
		    __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey);
void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
		     __be64 nw_vaddr, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
+360 −175

File changed.

Preview size limit exceeded, changes collapsed.

Loading