Commit 7c60c48f authored by Eric W. Biederman's avatar Eric W. Biederman
Browse files

sysctl: Improve the sysctl sanity checks



- Stop validating subdirectories now that we only register leaf tables

- Cleanup and improve the duplicate filename check.
  * Run the duplicate filename check under the sysctl_lock to guarantee
    we never add duplicate names.
  * Reduce the duplicate filename check to nearly O(M*N) where M is the
    number of entries in tthe table we are registering and N is the
    number of entries in the directory before we got there.

- Move the duplicate filename check into it's own function and call
  it directtly from __register_sysctl_table

- Kill the config option as the sanity checks are now cheap enough
  the config option is unnecessary. The original reason for the config
  option was because we had a huge table used to verify the proc filename
  to binary sysctl mapping.  That table has now evolved into the binary_sysctl
  translation layer and is no longer part of the sysctl_check code.

- Tighten up the permission checks.  Guarnateeing that files only have read
  or write permissions.

- Removed redudant check for parents having a procname as now everything has
  a procname.

- Generalize the backtrace logic so that we print a backtrace from
  any failure of __register_sysctl_table that was not caused by
  a memmory allocation failure.  The backtrace allows us to track
  down who erroneously registered a sysctl table.

Bechmark before (CONFIG_SYSCTL_CHECK=y):
    make-dummies 0 999 -> 12s
    rmmod dummy        -> 0.08s

Bechmark before (CONFIG_SYSCTL_CHECK=n):
    make-dummies 0 999 -> 0.7s
    rmmod dummy        -> 0.06s
    make-dummies 0 99999 -> 1m13s
    rmmod dummy          -> 0.38s

Benchmark after:
    make-dummies 0 999 -> 0.65s
    rmmod dummy        -> 0.055s
    make-dummies 0 9999 -> 1m10s
    rmmod dummy         -> 0.39s

The sysctl sanity checks now impose no measurable cost.

Signed-off-by: default avatarEric W. Biederman <ebiederm@xmission.com>
parent f728019b
Loading
Loading
Loading
Loading
+86 −136
Original line number Diff line number Diff line
@@ -726,129 +726,84 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
	}
}

#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
static int sysctl_depth(struct ctl_table *table)
static int sysctl_check_table_dups(const char *path, struct ctl_table *old,
	struct ctl_table *table)
{
	struct ctl_table *tmp;
	int depth;

	depth = 0;
	for (tmp = table; tmp->parent; tmp = tmp->parent)
		depth++;
	struct ctl_table *entry, *test;
	int error = 0;

	return depth;
	for (entry = old; entry->procname; entry++) {
		for (test = table; test->procname; test++) {
			if (strcmp(entry->procname, test->procname) == 0) {
				printk(KERN_ERR "sysctl duplicate entry: %s/%s\n",
					path, test->procname);
				error = -EEXIST;
			}
		}
	}
	return error;
}

static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
static int sysctl_check_dups(struct nsproxy *namespaces,
	struct ctl_table_header *header,
	const char *path, struct ctl_table *table)
{
	int i;
	struct ctl_table_root *root;
	struct ctl_table_set *set;
	struct ctl_table_header *dir_head, *head;
	struct ctl_table *dir_table;
	int error = 0;

	for (i = 0; table && i < n; i++)
		table = table->parent;
	/* No dups if we are the only member of our directory */
	if (header->attached_by != table)
		return 0;

	return table;
}
	dir_head = header->parent;
	dir_table = header->attached_to;

	error = sysctl_check_table_dups(path, dir_table, table);

static void sysctl_print_path(struct ctl_table *table)
{
	struct ctl_table *tmp;
	int depth, i;
	depth = sysctl_depth(table);
	if (table->procname) {
		for (i = depth; i >= 0; i--) {
			tmp = sysctl_parent(table, i);
			printk("/%s", tmp->procname?tmp->procname:"");
		}
	}
	printk(" ");
}
	root = &sysctl_table_root;
	do {
		set = lookup_header_set(root, namespaces);

static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
						struct ctl_table *table)
{
	struct ctl_table_header *head;
	struct ctl_table *ref, *test;
	int depth, cur_depth;

	depth = sysctl_depth(table);

	for (head = __sysctl_head_next(namespaces, NULL); head;
	     head = __sysctl_head_next(namespaces, head)) {
		cur_depth = depth;
		ref = head->ctl_table;
repeat:
		test = sysctl_parent(table, cur_depth);
		for (; ref->procname; ref++) {
			int match = 0;
			if (cur_depth && !ref->child)
		list_for_each_entry(head, &set->list, ctl_entry) {
			if (head->unregistering)
				continue;

			if (test->procname && ref->procname &&
			    (strcmp(test->procname, ref->procname) == 0))
					match++;

			if (match) {
				if (cur_depth != 0) {
					cur_depth--;
					ref = ref->child;
					goto repeat;
				}
				goto out;
			}
		}
			if (head->attached_to != dir_table)
				continue;
			error = sysctl_check_table_dups(path, head->attached_by,
							table);
		}
	ref = NULL;
out:
	sysctl_head_finish(head);
	return ref;
		root = list_entry(root->root_list.next,
				  struct ctl_table_root, root_list);
	} while (root != &sysctl_table_root);
	return error;
}

static void set_fail(const char **fail, struct ctl_table *table, const char *str)
static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
{
	if (*fail) {
		printk(KERN_ERR "sysctl table check failed: ");
		sysctl_print_path(table);
		printk(" %s\n", *fail);
		dump_stack();
	}
	*fail = str;
}
	struct va_format vaf;
	va_list args;

static void sysctl_check_leaf(struct nsproxy *namespaces,
				struct ctl_table *table, const char **fail)
{
	struct ctl_table *ref;
	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;

	printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
		path, table->procname, &vaf);

	ref = sysctl_check_lookup(namespaces, table);
	if (ref && (ref != table))
		set_fail(fail, table, "Sysctl already exists");
	va_end(args);
	return -EINVAL;
}

static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
static int sysctl_check_table(const char *path, struct ctl_table *table)
{
	int error = 0;
	int err = 0;
	for (; table->procname; table++) {
		const char *fail = NULL;

		if (table->parent) {
			if (!table->parent->procname)
				set_fail(&fail, table, "Parent without procname");
		}
		if (table->child) {
			if (table->data)
				set_fail(&fail, table, "Directory with data?");
			if (table->maxlen)
				set_fail(&fail, table, "Directory with maxlen?");
			if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
				set_fail(&fail, table, "Writable sysctl directory");
			if (table->proc_handler)
				set_fail(&fail, table, "Directory with proc_handler");
			if (table->extra1)
				set_fail(&fail, table, "Directory with extra1");
			if (table->extra2)
				set_fail(&fail, table, "Directory with extra2");
		} else {
		if (table->child)
			err = sysctl_err(path, table, "Not a file");

		if ((table->proc_handler == proc_dostring) ||
		    (table->proc_handler == proc_dointvec) ||
		    (table->proc_handler == proc_dointvec_minmax) ||
@@ -858,28 +813,19 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl
		    (table->proc_handler == proc_doulongvec_minmax) ||
		    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
			if (!table->data)
					set_fail(&fail, table, "No data");
				err = sysctl_err(path, table, "No data");
			if (!table->maxlen)
					set_fail(&fail, table, "No maxlen");
				err = sysctl_err(path, table, "No maxlen");
		}
#ifdef CONFIG_PROC_SYSCTL
		if (!table->proc_handler)
				set_fail(&fail, table, "No proc_handler");
#endif
			sysctl_check_leaf(namespaces, table, &fail);
		}
		if (table->mode > 0777)
			set_fail(&fail, table, "bogus .mode");
		if (fail) {
			set_fail(&fail, table, NULL);
			error = -EINVAL;
		}
		if (table->child)
			error |= sysctl_check_table(namespaces, table->child);
			err = sysctl_err(path, table, "No proc_handler");

		if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
			err = sysctl_err(path, table, "bogus .mode 0%o",
				table->mode);
	}
	return error;
	return err;
}
#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */

/**
 * __register_sysctl_table - register a leaf sysctl table
@@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table(
	header->root = root;
	sysctl_set_parent(NULL, header->ctl_table);
	header->count = 1;
#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
	if (sysctl_check_table(namespaces, header->ctl_table)) {
		kfree(header);
		return NULL;
	}
#endif
	if (sysctl_check_table(path, table))
		goto fail;
	spin_lock(&sysctl_lock);
	header->set = lookup_header_set(root, namespaces);
	header->attached_by = header->ctl_table;
@@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table(
				  struct ctl_table_root, root_list);
		set = lookup_header_set(root, namespaces);
	}
	if (sysctl_check_dups(namespaces, header, path, table))
		goto fail_locked;
	header->parent->count++;
	list_add_tail(&header->ctl_entry, &header->set->list);
	spin_unlock(&sysctl_lock);

	return header;
fail_locked:
	spin_unlock(&sysctl_lock);
fail:
	kfree(header);
	dump_stack();
	return NULL;
}

static char *append_path(const char *path, char *pos, const char *name)
+0 −8
Original line number Diff line number Diff line
@@ -1113,14 +1113,6 @@ config LATENCYTOP
	  Enable this option if you want to use the LatencyTOP tool
	  to find out which userspace is blocking on what kernel operations.

config SYSCTL_SYSCALL_CHECK
	bool "Sysctl checks"
	depends on SYSCTL
	---help---
	  sys_sysctl uses binary paths that have been found challenging
	  to properly maintain and use. This enables checks that help
	  you to keep things correct.

source mm/Kconfig.debug
source kernel/trace/Kconfig