Commit 75874b3d authored by Chuck Lever's avatar Chuck Lever Committed by Jason Gunthorpe
Browse files

RDMA/cm: Replace pr_debug() call sites with tracepoints

In the interest of converging on a common instrumentation infrastructure,
modernize the pr_debug() call sites added by commit 119bf817 ("IB/cm:
Add debug prints to ib_cm"). The new tracepoints appear in a new "ib_cma"
subsystem.

The conversion is somewhat mechanical. Someone more familiar with the
semantics of the recorded information might suggest additional data
capture.

Some benefits include:

- Tracepoints enable "always on" reporting of these errors
- The error records are structured and compact
- Tracepoints provide hooks for eBPF scripts

Sample output:

            nfsd-1954  [003]    62.017901: icm_dreq_skipped:     local_id=1998890974 remote_id=1129750393 state=DREQ_RCVD lap_state=LAP_UNINIT

Link: https://lore.kernel.org/r/159767239665.2968.10613294222688696646.stgit@klimt.1015granger.net


Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent b3d03daa
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o

ib_cm-y :=			cm.o
ib_cm-y :=			cm.o cm_trace.o

iw_cm-y :=			iwcm.o iwpm_util.o iwpm_msg.o

+26 −54
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
#include "core_priv.h"
#include "cm_trace.h"

MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
@@ -2123,8 +2124,7 @@ static int cm_req_handler(struct cm_work *work)

	listen_cm_id_priv = cm_match_req(work, cm_id_priv);
	if (!listen_cm_id_priv) {
		pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
			 be32_to_cpu(cm_id_priv->id.local_id));
		trace_icm_no_listener_err(&cm_id_priv->id);
		cm_id_priv->id.state = IB_CM_IDLE;
		ret = -EINVAL;
		goto destroy;
@@ -2273,8 +2273,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
	spin_lock_irqsave(&cm_id_priv->lock, flags);
	if (cm_id->state != IB_CM_REQ_RCVD &&
	    cm_id->state != IB_CM_MRA_REQ_SENT) {
		pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
			 be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
		trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
		ret = -EINVAL;
		goto out;
	}
@@ -2347,8 +2346,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
	spin_lock_irqsave(&cm_id_priv->lock, flags);
	if (cm_id->state != IB_CM_REP_RCVD &&
	    cm_id->state != IB_CM_MRA_REP_SENT) {
		pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
			 be32_to_cpu(cm_id->local_id), cm_id->state);
		trace_icm_send_cm_rtu_err(cm_id);
		ret = -EINVAL;
		goto error;
	}
@@ -2464,7 +2462,7 @@ static int cm_rep_handler(struct cm_work *work)
		cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
	if (!cm_id_priv) {
		cm_dup_rep_handler(work);
		pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
		trace_icm_remote_no_priv_err(
			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
		return -EINVAL;
	}
@@ -2478,11 +2476,10 @@ static int cm_rep_handler(struct cm_work *work)
		break;
	default:
		ret = -EINVAL;
		pr_debug(
			"%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
			__func__, cm_id_priv->id.state,
		trace_icm_rep_unknown_err(
			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
			cm_id_priv->id.state);
		spin_unlock_irq(&cm_id_priv->lock);
		goto error;
	}
@@ -2499,7 +2496,7 @@ static int cm_rep_handler(struct cm_work *work)
		spin_unlock(&cm.lock);
		spin_unlock_irq(&cm_id_priv->lock);
		ret = -EINVAL;
		pr_debug("%s: Failed to insert remote id %d\n", __func__,
		trace_icm_insert_failed_err(
			 IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
		goto error;
	}
@@ -2516,9 +2513,8 @@ static int cm_rep_handler(struct cm_work *work)
			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
			     NULL, 0);
		ret = -EINVAL;
		pr_debug(
			"%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
			__func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
		trace_icm_staleconn_err(
			IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
			IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));

		if (cur_cm_id_priv) {
@@ -2645,9 +2641,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
		return -EINVAL;

	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
			 be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_dreq_skipped(&cm_id_priv->id);
		return -EINVAL;
	}

@@ -2721,10 +2715,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
		return -EINVAL;

	if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
		pr_debug(
			"%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
			__func__, be32_to_cpu(cm_id_priv->id.local_id),
			cm_id_priv->id.state);
		trace_icm_send_drep_err(&cm_id_priv->id);
		kfree(private_data);
		return -EINVAL;
	}
@@ -2809,9 +2800,8 @@ static int cm_dreq_handler(struct cm_work *work)
		atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
				counter[CM_DREQ_COUNTER]);
		cm_issue_drep(work->port, work->mad_recv_wc);
		pr_debug(
			"%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
			__func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
		trace_icm_no_priv_err(
			IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
			IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
		return -EINVAL;
	}
@@ -2857,9 +2847,7 @@ static int cm_dreq_handler(struct cm_work *work)
				counter[CM_DREQ_COUNTER]);
		goto unlock;
	default:
		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_dreq_unknown_err(&cm_id_priv->id);
		goto unlock;
	}
	cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2944,9 +2932,7 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
			      state);
		break;
	default:
		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
			 be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_send_unknown_rej_err(&cm_id_priv->id);
		return -EINVAL;
	}

@@ -3059,9 +3045,7 @@ static int cm_rej_handler(struct cm_work *work)
		}
		/* fall through */
	default:
		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_rej_unknown_err(&cm_id_priv->id);
		spin_unlock_irq(&cm_id_priv->lock);
		goto out;
	}
@@ -3117,9 +3101,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
		}
		/* fall through */
	default:
		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_send_mra_unknown_err(&cm_id_priv->id);
		ret = -EINVAL;
		goto error1;
	}
@@ -3228,9 +3210,7 @@ static int cm_mra_handler(struct cm_work *work)
				counter[CM_MRA_COUNTER]);
		/* fall through */
	default:
		pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_mra_unknown_err(&cm_id_priv->id);
		goto out;
	}

@@ -3766,8 +3746,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
		goto discard;

	pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
			     state, ib_wc_status_msg(wc_status));
	trace_icm_mad_send_err(state, wc_status);
	switch (state) {
	case IB_CM_REQ_SENT:
	case IB_CM_MRA_REQ_RCVD:
@@ -3890,7 +3869,7 @@ static void cm_work_handler(struct work_struct *_work)
		ret = cm_timewait_handler(work);
		break;
	default:
		pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
		trace_icm_handler_err(work->cm_event.event);
		ret = -EINVAL;
		break;
	}
@@ -3926,8 +3905,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
		ret = -EISCONN;
		break;
	default:
		pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
			 be32_to_cpu(cm_id->local_id), cm_id->state);
		trace_icm_establish_err(cm_id);
		ret = -EINVAL;
		break;
	}
@@ -4124,9 +4102,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
		ret = 0;
		break;
	default:
		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_qp_init_err(&cm_id_priv->id);
		ret = -EINVAL;
		break;
	}
@@ -4174,9 +4150,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
		ret = 0;
		break;
	default:
		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_qp_rtr_err(&cm_id_priv->id);
		ret = -EINVAL;
		break;
	}
@@ -4236,9 +4210,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
		ret = 0;
		break;
	default:
		pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
			 __func__, be32_to_cpu(cm_id_priv->id.local_id),
			 cm_id_priv->id.state);
		trace_icm_qp_rts_err(&cm_id_priv->id);
		ret = -EINVAL;
		break;
	}
+15 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Trace points for the IB Connection Manager.
 *
 * Author: Chuck Lever <chuck.lever@oracle.com>
 *
 * Copyright (c) 2020, Oracle and/or its affiliates.
 */

#include <rdma/rdma_cm.h>
#include "cma_priv.h"

#define CREATE_TRACE_POINTS

#include "cm_trace.h"
+309 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Trace point definitions for the RDMA Connect Manager.
 *
 * Author: Chuck Lever <chuck.lever@oracle.com>
 *
 * Copyright (c) 2020 Oracle and/or its affiliates.
 */

#undef TRACE_SYSTEM
#define TRACE_SYSTEM ib_cma

#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)

#define _TRACE_IB_CMA_H

#include <linux/tracepoint.h>
#include <rdma/ib_cm.h>
#include <trace/events/rdma.h>

/*
 * enum ib_cm_state, from include/rdma/ib_cm.h
 */
#define IB_CM_STATE_LIST					\
	ib_cm_state(IDLE)					\
	ib_cm_state(LISTEN)					\
	ib_cm_state(REQ_SENT)					\
	ib_cm_state(REQ_RCVD)					\
	ib_cm_state(MRA_REQ_SENT)				\
	ib_cm_state(MRA_REQ_RCVD)				\
	ib_cm_state(REP_SENT)					\
	ib_cm_state(REP_RCVD)					\
	ib_cm_state(MRA_REP_SENT)				\
	ib_cm_state(MRA_REP_RCVD)				\
	ib_cm_state(ESTABLISHED)				\
	ib_cm_state(DREQ_SENT)					\
	ib_cm_state(DREQ_RCVD)					\
	ib_cm_state(TIMEWAIT)					\
	ib_cm_state(SIDR_REQ_SENT)				\
	ib_cm_state_end(SIDR_REQ_RCVD)

#undef  ib_cm_state
#undef  ib_cm_state_end
#define ib_cm_state(x)		TRACE_DEFINE_ENUM(IB_CM_##x);
#define ib_cm_state_end(x)	TRACE_DEFINE_ENUM(IB_CM_##x);

IB_CM_STATE_LIST

#undef  ib_cm_state
#undef  ib_cm_state_end
#define ib_cm_state(x)		{ IB_CM_##x, #x },
#define ib_cm_state_end(x)	{ IB_CM_##x, #x }

#define show_ib_cm_state(x) \
		__print_symbolic(x, IB_CM_STATE_LIST)

/*
 * enum ib_cm_lap_state, from include/rdma/ib_cm.h
 */
#define IB_CM_LAP_STATE_LIST					\
	ib_cm_lap_state(LAP_UNINIT)				\
	ib_cm_lap_state(LAP_IDLE)				\
	ib_cm_lap_state(LAP_SENT)				\
	ib_cm_lap_state(LAP_RCVD)				\
	ib_cm_lap_state(MRA_LAP_SENT)				\
	ib_cm_lap_state_end(MRA_LAP_RCVD)

#undef  ib_cm_lap_state
#undef  ib_cm_lap_state_end
#define ib_cm_lap_state(x)	TRACE_DEFINE_ENUM(IB_CM_##x);
#define ib_cm_lap_state_end(x)	TRACE_DEFINE_ENUM(IB_CM_##x);

IB_CM_LAP_STATE_LIST

#undef  ib_cm_lap_state
#undef  ib_cm_lap_state_end
#define ib_cm_lap_state(x)	{ IB_CM_##x, #x },
#define ib_cm_lap_state_end(x)	{ IB_CM_##x, #x }

#define show_ib_cm_lap_state(x) \
		__print_symbolic(x, IB_CM_LAP_STATE_LIST)


DECLARE_EVENT_CLASS(icm_id_class,
	TP_PROTO(
		const struct ib_cm_id *cm_id
	),

	TP_ARGS(cm_id),

	TP_STRUCT__entry(
		__field(const void *, cm_id)	/* for eBPF scripts */
		__field(unsigned int, local_id)
		__field(unsigned int, remote_id)
		__field(unsigned long, state)
		__field(unsigned long, lap_state)
	),

	TP_fast_assign(
		__entry->cm_id = cm_id;
		__entry->local_id = be32_to_cpu(cm_id->local_id);
		__entry->remote_id = be32_to_cpu(cm_id->remote_id);
		__entry->state = cm_id->state;
		__entry->lap_state = cm_id->lap_state;
	),

	TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
		__entry->local_id, __entry->remote_id,
		show_ib_cm_state(__entry->state),
		show_ib_cm_lap_state(__entry->lap_state)
	)
);

#define DEFINE_CM_ERR_EVENT(name)					\
		DEFINE_EVENT(icm_id_class,				\
				icm_##name##_err,			\
				TP_PROTO(				\
					const struct ib_cm_id *cm_id	\
				),					\
				TP_ARGS(cm_id))

DEFINE_CM_ERR_EVENT(send_cm_rtu);
DEFINE_CM_ERR_EVENT(establish);
DEFINE_CM_ERR_EVENT(no_listener);
DEFINE_CM_ERR_EVENT(send_drep);
DEFINE_CM_ERR_EVENT(dreq_unknown);
DEFINE_CM_ERR_EVENT(send_unknown_rej);
DEFINE_CM_ERR_EVENT(rej_unknown);
DEFINE_CM_ERR_EVENT(send_mra_unknown);
DEFINE_CM_ERR_EVENT(mra_unknown);
DEFINE_CM_ERR_EVENT(qp_init);
DEFINE_CM_ERR_EVENT(qp_rtr);
DEFINE_CM_ERR_EVENT(qp_rts);

DEFINE_EVENT(icm_id_class,						\
	icm_dreq_skipped,						\
	TP_PROTO(							\
		const struct ib_cm_id *cm_id				\
	),								\
	TP_ARGS(cm_id)							\
);

DECLARE_EVENT_CLASS(icm_local_class,
	TP_PROTO(
		unsigned int local_id,
		unsigned int remote_id
	),

	TP_ARGS(local_id, remote_id),

	TP_STRUCT__entry(
		__field(unsigned int, local_id)
		__field(unsigned int, remote_id)
	),

	TP_fast_assign(
		__entry->local_id = local_id;
		__entry->remote_id = remote_id;
	),

	TP_printk("local_id=%u remote_id=%u",
		__entry->local_id, __entry->remote_id
	)
);

#define DEFINE_CM_LOCAL_EVENT(name)					\
		DEFINE_EVENT(icm_local_class,				\
				icm_##name,				\
				TP_PROTO(				\
					unsigned int local_id,			\
					unsigned int remote_id			\
				),					\
				TP_ARGS(local_id, remote_id))

DEFINE_CM_LOCAL_EVENT(staleconn_err);
DEFINE_CM_LOCAL_EVENT(no_priv_err);

DECLARE_EVENT_CLASS(icm_remote_class,
	TP_PROTO(
		u32 remote_id
	),

	TP_ARGS(remote_id),

	TP_STRUCT__entry(
		__field(u32, remote_id)
	),

	TP_fast_assign(
		__entry->remote_id = remote_id;
	),

	TP_printk("remote_id=%u",
		__entry->remote_id
	)
);

#define DEFINE_CM_REMOTE_EVENT(name)					\
		DEFINE_EVENT(icm_remote_class,				\
				icm_##name,				\
				TP_PROTO(				\
					u32 remote_id			\
				),					\
				TP_ARGS(remote_id))

DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
DEFINE_CM_REMOTE_EVENT(insert_failed_err);

TRACE_EVENT(icm_send_rep_err,
	TP_PROTO(
		__be32 local_id,
		enum ib_cm_state state
	),

	TP_ARGS(local_id, state),

	TP_STRUCT__entry(
		__field(unsigned int, local_id)
		__field(unsigned long, state)
	),

	TP_fast_assign(
		__entry->local_id = be32_to_cpu(local_id);
		__entry->state = state;
	),

	TP_printk("local_id=%u state=%s",
		__entry->local_id, show_ib_cm_state(__entry->state)
	)
);

TRACE_EVENT(icm_rep_unknown_err,
	TP_PROTO(
		unsigned int local_id,
		unsigned int remote_id,
		enum ib_cm_state state
	),

	TP_ARGS(local_id, remote_id, state),

	TP_STRUCT__entry(
		__field(unsigned int, local_id)
		__field(unsigned int, remote_id)
		__field(unsigned long, state)
	),

	TP_fast_assign(
		__entry->local_id = local_id;
		__entry->remote_id = remote_id;
		__entry->state = state;
	),

	TP_printk("local_id=%u remote_id=%u state=%s",
		__entry->local_id, __entry->remote_id,
		show_ib_cm_state(__entry->state)
	)
);

TRACE_EVENT(icm_handler_err,
	TP_PROTO(
		enum ib_cm_event_type event
	),

	TP_ARGS(event),

	TP_STRUCT__entry(
		__field(unsigned long, event)
	),

	TP_fast_assign(
		__entry->event = event;
	),

	TP_printk("unhandled event=%s",
		rdma_show_ib_cm_event(__entry->event)
	)
);

TRACE_EVENT(icm_mad_send_err,
	TP_PROTO(
		enum ib_cm_state state,
		enum ib_wc_status wc_status
	),

	TP_ARGS(state, wc_status),

	TP_STRUCT__entry(
		__field(unsigned long, state)
		__field(unsigned long, wc_status)
	),

	TP_fast_assign(
		__entry->state = state;
		__entry->wc_status = wc_status;
	),

	TP_printk("state=%s completion status=%s",
		show_ib_cm_state(__entry->state),
		rdma_show_wc_status(__entry->wc_status)
	)
);

#endif /* _TRACE_IB_CMA_H */

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
#define TRACE_INCLUDE_FILE cm_trace

#include <trace/define_trace.h>