Commit 3745dc24 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'writeable-bpf-tracepoints'



Matt Mullins says:

====================
This adds an opt-in interface for tracepoints to expose a writable context to
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that are attached, while
supporting read-only access from existing BPF_PROG_TYPE_RAW_TRACEPOINT
programs, as well as from non-BPF-based tracepoints.

The initial motivation is to support tracing that can be observed from the
remote end of an NBD socket, e.g. by adding flags to the struct nbd_request
header.  Earlier attempts included adding an NBD-specific tracepoint fd, but in
code review, I was recommended to implement it more generically -- as a result,
this patchset is far simpler than my initial try.

v4->v5:
  * rebased onto bpf-next/master and fixed merge conflicts
  * "tools: sync bpf.h" also syncs comments that have previously changed
    in bpf-next

v3->v4:
  * fixed a silly copy/paste typo in include/trace/events/bpf_test_run.h
    (_TRACE_NBD_H -> _TRACE_BPF_TEST_RUN_H)
  * fixed incorrect/misleading wording in patch 1's commit message,
    since the pointer cannot be directly dereferenced in a
    BPF_PROG_TYPE_RAW_TRACEPOINT
  * cleaned up the error message wording if the prog_tests fail
  * Addressed feedback from Yonghong
    * reject non-pointer-sized accesses to the buffer pointer
    * use sizeof(struct nbd_request) as one-byte-past-the-end in
      raw_tp_writable_reject_nbd_invalid.c
    * use BPF_MOV64_IMM instead of BPF_LD_IMM64

v2->v3:
  * Andrew addressed Josef's comments:
    * C-style commenting in nbd.c
    * Collapsed identical events into a single DECLARE_EVENT_CLASS.
      This saves about 2kB of kernel text

v1->v2:
  * add selftests
    * sync tools/include/uapi/linux/bpf.h
  * reject variable offset into the buffer
  * add string representation of PTR_TO_TP_BUFFER to reg_type_str
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 34b8ab09 e950e843
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -10741,6 +10741,7 @@ L: linux-block@vger.kernel.org
L:	nbd@other.debian.org
F:	Documentation/blockdev/nbd.txt
F:	drivers/block/nbd.c
F:	include/trace/events/nbd.h
F:	include/uapi/linux/nbd.h

NETWORK DROP MONITOR
+13 −0
Original line number Diff line number Diff line
@@ -44,6 +44,9 @@
#include <linux/nbd-netlink.h>
#include <net/genetlink.h>

#define CREATE_TRACE_POINTS
#include <trace/events/nbd.h>

static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);
static int nbd_total_devices = 0;
@@ -510,6 +513,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
	if (sent) {
		if (sent >= sizeof(request)) {
			skip = sent - sizeof(request);

			/* initialize handle for tracing purposes */
			handle = nbd_cmd_handle(cmd);

			goto send_pages;
		}
		iov_iter_advance(&from, sent);
@@ -526,11 +533,14 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
	handle = nbd_cmd_handle(cmd);
	memcpy(request.handle, &handle, sizeof(handle));

	trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));

	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
		req, nbdcmd_to_ascii(type),
		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
	result = sock_xmit(nbd, index, 1, &from,
			(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
	trace_nbd_header_sent(req, handle);
	if (result <= 0) {
		if (was_interrupted(result)) {
			/* If we havne't sent anything we can just return BUSY,
@@ -603,6 +613,7 @@ send_pages:
		bio = next;
	}
out:
	trace_nbd_payload_sent(req, handle);
	nsock->pending = NULL;
	nsock->sent = 0;
	return 0;
@@ -650,6 +661,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
			tag, req);
		return ERR_PTR(-ENOENT);
	}
	trace_nbd_header_received(req, handle);
	cmd = blk_mq_rq_to_pdu(req);

	mutex_lock(&cmd->lock);
@@ -703,6 +715,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
		}
	}
out:
	trace_nbd_payload_received(req, handle);
	mutex_unlock(&cmd->lock);
	return ret ? ERR_PTR(ret) : cmd;
}
+2 −0
Original line number Diff line number Diff line
@@ -272,6 +272,7 @@ enum bpf_reg_type {
	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
};

/* The information passed from prog-specific *_is_valid_access
@@ -361,6 +362,7 @@ struct bpf_prog_aux {
	u32 used_map_cnt;
	u32 max_ctx_offset;
	u32 max_pkt_offset;
	u32 max_tp_access;
	u32 stack_depth;
	u32 id;
	u32 func_cnt; /* used by non-func prog as the number of func progs */
+1 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
+1 −0
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@ struct bpf_raw_event_map {
	struct tracepoint	*tp;
	void			*bpf_func;
	u32			num_args;
	u32			writable_size;
} __aligned(32);

#endif
Loading