Commit 3e5901cb authored by Chuck Lever's avatar Chuck Lever Committed by Jason Gunthorpe
Browse files

RDMA/core: Trace points for diagnosing completion queue issues

Sample trace events:

   kworker/u29:0-300   [007]   120.042217: cq_alloc:             cq.id=4 nr_cqe=161 comp_vector=2 poll_ctx=WORKQUEUE
          <idle>-0     [002]   120.056292: cq_schedule:          cq.id=4
    kworker/2:1H-482   [002]   120.056402: cq_process:           cq.id=4 wake-up took 109 [us] from interrupt
    kworker/2:1H-482   [002]   120.056407: cq_poll:              cq.id=4 requested 16, returned 1
          <idle>-0     [002]   120.067503: cq_schedule:          cq.id=4
    kworker/2:1H-482   [002]   120.067537: cq_process:           cq.id=4 wake-up took 34 [us] from interrupt
    kworker/2:1H-482   [002]   120.067541: cq_poll:              cq.id=4 requested 16, returned 1
          <idle>-0     [002]   120.067657: cq_schedule:          cq.id=4
    kworker/2:1H-482   [002]   120.067672: cq_process:           cq.id=4 wake-up took 15 [us] from interrupt
    kworker/2:1H-482   [002]   120.067674: cq_poll:              cq.id=4 requested 16, returned 1

 ...

         systemd-1     [002]   122.392653: cq_schedule:          cq.id=4
    kworker/2:1H-482   [002]   122.392688: cq_process:           cq.id=4 wake-up took 35 [us] from interrupt
    kworker/2:1H-482   [002]   122.392693: cq_poll:              cq.id=4 requested 16, returned 16
    kworker/2:1H-482   [002]   122.392836: cq_poll:              cq.id=4 requested 16, returned 16
    kworker/2:1H-482   [002]   122.392970: cq_poll:              cq.id=4 requested 16, returned 16
    kworker/2:1H-482   [002]   122.393083: cq_poll:              cq.id=4 requested 16, returned 16
    kworker/2:1H-482   [002]   122.393195: cq_poll:              cq.id=4 requested 16, returned 3

Several features to note in this output:
 - The WCE count and context type are reported at allocation time
 - The CPU and kworker for each CQ is evident
 - The CQ's restracker ID is tagged on each trace event
 - CQ poll scheduling latency is measured
 - Details about how often single completions occur versus multiple
   completions are evident
 - The cost of the ULP's completion handler is recorded

Link: https://lore.kernel.org/r/20191218201815.30584.3481.stgit@manet.1015granger.net


Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Reviewed-by: default avatarParav Pandit <parav@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent ed999f82
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -11,7 +11,8 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
				device.o fmr_pool.o cache.o netlink.o \
				roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
				multicast.o mad.o smi.o agent.o mad_rmpp.o \
				nldev.o restrack.o counters.o ib_core_uverbs.o
				nldev.o restrack.o counters.o ib_core_uverbs.o \
				trace.o

ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
+24 −3
Original line number Diff line number Diff line
@@ -7,6 +7,8 @@
#include <linux/slab.h>
#include <rdma/ib_verbs.h>

#include <trace/events/rdma_core.h>

/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH			16
#define IB_POLL_BATCH_DIRECT		8
@@ -41,6 +43,7 @@ static void ib_cq_rdma_dim_work(struct work_struct *w)

	dim->state = DIM_START_MEASURE;

	trace_cq_modify(cq, comps, usec);
	cq->device->ops.modify_cq(cq, comps, usec);
}

@@ -65,17 +68,28 @@ static void rdma_dim_init(struct ib_cq *cq)
	INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
}

static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
{
	int rc;

	rc = ib_poll_cq(cq, num_entries, wc);
	trace_cq_poll(cq, num_entries, rc);
	return rc;
}

static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
			   int batch)
{
	int i, n, completed = 0;

	trace_cq_process(cq);

	/*
	 * budget might be (-1) if the caller does not
	 * want to bound this call, thus we need unsigned
	 * minimum here.
	 */
	while ((n = ib_poll_cq(cq, min_t(u32, batch,
	while ((n = __poll_cq(cq, min_t(u32, batch,
					budget - completed), wcs)) > 0) {
		for (i = 0; i < n; i++) {
			struct ib_wc *wc = &wcs[i];
@@ -131,9 +145,11 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
	completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
	if (completed < budget) {
		irq_poll_complete(&cq->iop);
		if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
		if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) {
			trace_cq_reschedule(cq);
			irq_poll_sched(&cq->iop);
		}
	}

	if (dim)
		rdma_dim(dim, completed);
@@ -143,6 +159,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)

static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
{
	trace_cq_schedule(cq);
	irq_poll_sched(&cq->iop);
}

@@ -162,6 +179,7 @@ static void ib_cq_poll_work(struct work_struct *work)

static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
	trace_cq_schedule(cq);
	queue_work(cq->comp_wq, &cq->work);
}

@@ -239,6 +257,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
		goto out_destroy_cq;
	}

	trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
	return cq;

out_destroy_cq:
@@ -248,6 +267,7 @@ out_free_wc:
	kfree(cq->wc);
out_free_cq:
	kfree(cq);
	trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
	return ERR_PTR(ret);
}
EXPORT_SYMBOL(__ib_alloc_cq_user);
@@ -304,6 +324,7 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
		WARN_ON_ONCE(1);
	}

	trace_cq_free(cq);
	rdma_restrack_del(&cq->res);
	cq->device->ops.destroy_cq(cq, udata);
	if (cq->dim)
+14 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Trace points for core RDMA functions.
 *
 * Author: Chuck Lever <chuck.lever@oracle.com>
 *
 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
 */

#define CREATE_TRACE_POINTS

#include <rdma/ib_verbs.h>

#include <trace/events/rdma_core.h>
+4 −0
Original line number Diff line number Diff line
@@ -53,6 +53,8 @@

#include "core_priv.h"

#include <trace/events/rdma_core.h>

static int ib_resolve_eth_dmac(struct ib_device *device,
			       struct rdma_ah_attr *ah_attr);

@@ -2744,6 +2746,7 @@ void ib_drain_sq(struct ib_qp *qp)
		qp->device->ops.drain_sq(qp);
	else
		__ib_drain_sq(qp);
	trace_cq_drain_complete(qp->send_cq);
}
EXPORT_SYMBOL(ib_drain_sq);

@@ -2772,6 +2775,7 @@ void ib_drain_rq(struct ib_qp *qp)
		qp->device->ops.drain_rq(qp);
	else
		__ib_drain_rq(qp);
	trace_cq_drain_complete(qp->recv_cq);
}
EXPORT_SYMBOL(ib_drain_rq);

+5 −0
Original line number Diff line number Diff line
@@ -1558,6 +1558,11 @@ struct ib_cq {
	};
	struct workqueue_struct *comp_wq;
	struct dim *dim;

	/* updated only by trace points */
	ktime_t timestamp;
	bool interrupt;

	/*
	 * Implementation details of the RDMA core, don't use in drivers:
	 */
Loading