Commit 358fdb45 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf_probe_read_user'



Daniel Borkmann says:

====================
This set adds probe_read_{user,kernel}(), probe_read_str_{user,kernel}()
helpers, fixes probe_write_user() helper and selftests. For details please
see individual patches.

Thanks!

v2 -> v3:
  - noticed two more things that are fixed in here:
   - bpf uapi helper description used 'int size' for *_str helpers, now u32
   - we need TASK_SIZE_MAX + guard page on x86-64 in patch 2 otherwise
     we'll trigger the 00c42373 warn as well, so full range covered now
v1 -> v2:
  - standardize unsafe_ptr terminology in uapi header comment (Andrii)
  - probe_read_{user,kernel}[_str] naming scheme (Andrii)
  - use global data in last test case, remove relaxed_maps (Andrii)
  - add strict non-pagefault kernel read funcs to avoid warning in
    kernel probe read helpers (Alexei)
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e1cb7d2d fa553d9b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif

obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
	    pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o

# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)

arch/x86/mm/maccess.c

0 → 100644
+43 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only

#include <linux/uaccess.h>
#include <linux/kernel.h>

#ifdef CONFIG_X86_64
static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
{
	return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
}

static __always_inline bool invalid_probe_range(u64 vaddr)
{
	/*
	 * Range covering the highest possible canonical userspace address
	 * as well as non-canonical address range. For the canonical range
	 * we also need to include the userspace guard page.
	 */
	return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
	       canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
}
#else
static __always_inline bool invalid_probe_range(u64 vaddr)
{
	return vaddr < TASK_SIZE_MAX;
}
#endif

long probe_kernel_read_strict(void *dst, const void *src, size_t size)
{
	if (unlikely(invalid_probe_range((unsigned long)src)))
		return -EFAULT;

	return __probe_kernel_read(dst, src, size);
}

long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
{
	if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
		return -EFAULT;

	return __strncpy_from_unsafe(dst, unsafe_addr, count);
}
+16 −0
Original line number Diff line number Diff line
@@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
 * happens, handle that and return -EFAULT.
 */
extern long probe_kernel_read(void *dst, const void *src, size_t size);
extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
extern long __probe_kernel_read(void *dst, const void *src, size_t size);

/*
@@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);

/*
 * probe_user_write(): safely attempt to write to a location in user space
 * @dst: address to write to
 * @src: pointer to the data that shall be written
 * @size: size of the data chunk
 *
 * Safely write to address @dst from the buffer at @src.  If a kernel fault
 * happens, handle that and return -EFAULT.
 */
extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);

extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
				       long count);
extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
				     long count);
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
+82 −40
Original line number Diff line number Diff line
@@ -563,10 +563,13 @@ union bpf_attr {
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
 * int bpf_probe_read(void *dst, u32 size, const void *src)
 * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
 * 	Description
 * 		For tracing programs, safely attempt to read *size* bytes from
 * 		address *src* and store the data in *dst*.
 * 		kernel space address *unsafe_ptr* and store the data in *dst*.
 *
 * 		Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
 * 		instead.
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
@@ -1428,45 +1431,14 @@ union bpf_attr {
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
 * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
 * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
 * 	Description
 * 		Copy a NUL terminated string from an unsafe address
 * 		*unsafe_ptr* to *dst*. The *size* should include the
 * 		terminating NUL byte. In case the string length is smaller than
 * 		*size*, the target is not padded with further NUL bytes. If the
 * 		string length is larger than *size*, just *size*-1 bytes are
 * 		copied and the last byte is set to NUL.
 *
 * 		On success, the length of the copied string is returned. This
 * 		makes this helper useful in tracing programs for reading
 * 		strings, and more importantly to get its length at runtime. See
 * 		the following snippet:
 * 		Copy a NUL terminated string from an unsafe kernel address
 * 		*unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
 * 		more details.
 *
 * 		::
 *
 * 			SEC("kprobe/sys_open")
 * 			void bpf_sys_open(struct pt_regs *ctx)
 * 			{
 * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
 * 			        int res = bpf_probe_read_str(buf, sizeof(buf),
 * 				                             ctx->di);
 *
 * 				// Consume buf, for example push it to
 * 				// userspace via bpf_perf_event_output(); we
 * 				// can use res (the string length) as event
 * 				// size, after checking its boundaries.
 * 			}
 *
 * 		In comparison, using **bpf_probe_read()** helper here instead
 * 		to read the string would require to estimate the length at
 * 		compile time, and would often result in copying more memory
 * 		than necessary.
 *
 * 		Another useful use case is when parsing individual process
 * 		arguments or individual environment variables navigating
 * 		*current*\ **->mm->arg_start** and *current*\
 * 		**->mm->env_start**: using this helper and the return value,
 * 		one can quickly iterate at the right offset of the memory area.
 * 		Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
 * 		instead.
 * 	Return
 * 		On success, the strictly positive length of the string,
 * 		including the trailing NUL character. On error, a negative
@@ -2777,6 +2749,72 @@ union bpf_attr {
 * 		restricted to raw_tracepoint bpf programs.
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
 * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
 * 	Description
 * 		Safely attempt to read *size* bytes from user space address
 * 		*unsafe_ptr* and store the data in *dst*.
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
 * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
 * 	Description
 * 		Safely attempt to read *size* bytes from kernel space address
 * 		*unsafe_ptr* and store the data in *dst*.
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 *
 * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
 * 	Description
 * 		Copy a NUL terminated string from an unsafe user address
 * 		*unsafe_ptr* to *dst*. The *size* should include the
 * 		terminating NUL byte. In case the string length is smaller than
 * 		*size*, the target is not padded with further NUL bytes. If the
 * 		string length is larger than *size*, just *size*-1 bytes are
 * 		copied and the last byte is set to NUL.
 *
 * 		On success, the length of the copied string is returned. This
 * 		makes this helper useful in tracing programs for reading
 * 		strings, and more importantly to get its length at runtime. See
 * 		the following snippet:
 *
 * 		::
 *
 * 			SEC("kprobe/sys_open")
 * 			void bpf_sys_open(struct pt_regs *ctx)
 * 			{
 * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
 * 			        int res = bpf_probe_read_user_str(buf, sizeof(buf),
 * 				                                  ctx->di);
 *
 * 				// Consume buf, for example push it to
 * 				// userspace via bpf_perf_event_output(); we
 * 				// can use res (the string length) as event
 * 				// size, after checking its boundaries.
 * 			}
 *
 * 		In comparison, using **bpf_probe_read_user()** helper here
 * 		instead to read the string would require to estimate the length
 * 		at compile time, and would often result in copying more memory
 * 		than necessary.
 *
 * 		Another useful use case is when parsing individual process
 * 		arguments or individual environment variables navigating
 * 		*current*\ **->mm->arg_start** and *current*\
 * 		**->mm->env_start**: using this helper and the return value,
 * 		one can quickly iterate at the right offset of the memory area.
 * 	Return
 * 		On success, the strictly positive length of the string,
 * 		including the trailing NUL character. On error, a negative
 * 		value.
 *
 * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
 * 	Description
 * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
 * 		to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
 * 	Return
 * 		On success, the strictly positive length of the string,	including
 * 		the trailing NUL character. On error, a negative value.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -2890,7 +2928,11 @@ union bpf_attr {
	FN(sk_storage_delete),		\
	FN(send_signal),		\
	FN(tcp_gen_syncookie),		\
	FN(skb_output),
	FN(skb_output),			\
	FN(probe_read_user),		\
	FN(probe_read_kernel),		\
	FN(probe_read_user_str),	\
	FN(probe_read_kernel_str),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
+5 −4
Original line number Diff line number Diff line
@@ -1306,11 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code)
}

#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr)
u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
{
	memset(dst, 0, size);
	return -EFAULT;
}

/**
 *	__bpf_prog_run - run eBPF program on a given context
 *	@regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -1568,7 +1569,7 @@ out:
#undef LDST
#define LDX_PROBE(SIZEOP, SIZE)							\
	LDX_PROBE_MEM_##SIZEOP:							\
		bpf_probe_read(&DST, SIZE, (const void *)(long) SRC);	\
		bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC);	\
		CONT;
	LDX_PROBE(B,  1)
	LDX_PROBE(H,  2)
Loading