Commit 40091325 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'work.mount-syscalls' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull mount ABI updates from Al Viro:
 "The syscalls themselves, finally.

  That's not all there is to that stuff, but switching individual
  filesystems to new methods is fortunately independent from everything
  else, so e.g. NFS series can go through NFS tree, etc.

  As those conversions get done, we'll be finally able to get rid of a
  bunch of duplication in fs/super.c introduced in the beginning of the
  entire thing. I expect that to be finished in the next window..."

* 'work.mount-syscalls' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  vfs: Add a sample program for the new mount API
  vfs: syscall: Add fspick() to select a superblock for reconfiguration
  vfs: syscall: Add fsmount() to create a mount for a superblock
  vfs: syscall: Add fsconfig() for configuring and managing a context
  vfs: Implement logging through fs_context
  vfs: syscall: Add fsopen() to prepare for superblock creation
  Make anon_inodes unconditional
  teach move_mount(2) to work with OPEN_TREE_CLONE
  vfs: syscall: Add move_mount(2) to move mounts around
  vfs: syscall: Add open_tree(2) to reference or clone a mount
parents d27fb65b f1b5618e
Loading
Loading
Loading
Loading
+6 −1
Original line number Original line Diff line number Diff line
@@ -398,7 +398,12 @@
384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
385	i386	io_pgetevents		sys_io_pgetevents_time32	__ia32_compat_sys_io_pgetevents
385	i386	io_pgetevents		sys_io_pgetevents_time32	__ia32_compat_sys_io_pgetevents
386	i386	rseq			sys_rseq			__ia32_sys_rseq
386	i386	rseq			sys_rseq			__ia32_sys_rseq
# don't use numbers 387 through 392, add new calls at the end
387	i386	open_tree		sys_open_tree			__ia32_sys_open_tree
388	i386	move_mount		sys_move_mount			__ia32_sys_move_mount
389	i386	fsopen			sys_fsopen			__ia32_sys_fsopen
390	i386	fsconfig		sys_fsconfig			__ia32_sys_fsconfig
391	i386	fsmount			sys_fsmount			__ia32_sys_fsmount
392	i386	fspick			sys_fspick			__ia32_sys_fspick
393	i386	semget			sys_semget    			__ia32_sys_semget
393	i386	semget			sys_semget    			__ia32_sys_semget
394	i386	semctl			sys_semctl    			__ia32_compat_sys_semctl
394	i386	semctl			sys_semctl    			__ia32_compat_sys_semctl
395	i386	shmget			sys_shmget    			__ia32_sys_shmget
395	i386	shmget			sys_shmget    			__ia32_sys_shmget
+6 −0
Original line number Original line Diff line number Diff line
@@ -343,6 +343,12 @@
332	common	statx			__x64_sys_statx
332	common	statx			__x64_sys_statx
333	common	io_pgetevents		__x64_sys_io_pgetevents
333	common	io_pgetevents		__x64_sys_io_pgetevents
334	common	rseq			__x64_sys_rseq
334	common	rseq			__x64_sys_rseq
335	common	open_tree		__x64_sys_open_tree
336	common	move_mount		__x64_sys_move_mount
337	common	fsopen			__x64_sys_fsopen
338	common	fsconfig		__x64_sys_fsconfig
339	common	fsmount			__x64_sys_fsmount
340	common	fspick			__x64_sys_fspick
# don't use numbers 387 through 423, add new calls after the last
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
# 'common' entry
424	common	pidfd_send_signal	__x64_sys_pidfd_send_signal
424	common	pidfd_send_signal	__x64_sys_pidfd_send_signal
+1 −1
Original line number Original line Diff line number Diff line
@@ -13,7 +13,7 @@ obj-y := open.o read_write.o file_table.o super.o \
		seq_file.o xattr.o libfs.o fs-writeback.o \
		seq_file.o xattr.o libfs.o fs-writeback.o \
		pnode.o splice.o sync.o utimes.o d_path.o \
		pnode.o splice.o sync.o utimes.o d_path.o \
		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
		fs_types.o fs_context.o fs_parser.o
		fs_types.o fs_context.o fs_parser.o fsopen.o


ifeq ($(CONFIG_BLOCK),y)
ifeq ($(CONFIG_BLOCK),y)
obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
+6 −3
Original line number Original line Diff line number Diff line
@@ -255,6 +255,7 @@ static void __fput(struct file *file)
	struct dentry *dentry = file->f_path.dentry;
	struct dentry *dentry = file->f_path.dentry;
	struct vfsmount *mnt = file->f_path.mnt;
	struct vfsmount *mnt = file->f_path.mnt;
	struct inode *inode = file->f_inode;
	struct inode *inode = file->f_inode;
	fmode_t mode = file->f_mode;


	if (unlikely(!(file->f_mode & FMODE_OPENED)))
	if (unlikely(!(file->f_mode & FMODE_OPENED)))
		goto out;
		goto out;
@@ -277,18 +278,20 @@ static void __fput(struct file *file)
	if (file->f_op->release)
	if (file->f_op->release)
		file->f_op->release(inode, file);
		file->f_op->release(inode, file);
	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
		     !(file->f_mode & FMODE_PATH))) {
		     !(mode & FMODE_PATH))) {
		cdev_put(inode->i_cdev);
		cdev_put(inode->i_cdev);
	}
	}
	fops_put(file->f_op);
	fops_put(file->f_op);
	put_pid(file->f_owner.pid);
	put_pid(file->f_owner.pid);
	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
		i_readcount_dec(inode);
		i_readcount_dec(inode);
	if (file->f_mode & FMODE_WRITER) {
	if (mode & FMODE_WRITER) {
		put_write_access(inode);
		put_write_access(inode);
		__mnt_drop_write(mnt);
		__mnt_drop_write(mnt);
	}
	}
	dput(dentry);
	dput(dentry);
	if (unlikely(mode & FMODE_NEED_UNMOUNT))
		dissolve_on_fput(mnt);
	mntput(mnt);
	mntput(mnt);
out:
out:
	file_free(file);
	file_free(file);
+146 −14
Original line number Original line Diff line number Diff line
@@ -11,6 +11,7 @@
 */
 */


#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/fs_context.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/fs_parser.h>
#include <linux/fs.h>
#include <linux/fs.h>
@@ -23,6 +24,7 @@
#include <linux/pid_namespace.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <net/net_namespace.h>
#include <asm/sections.h>
#include "mount.h"
#include "mount.h"
#include "internal.h"
#include "internal.h"


@@ -271,6 +273,8 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
	fc->cred	= get_current_cred();
	fc->cred	= get_current_cred();
	fc->net_ns	= get_net(current->nsproxy->net_ns);
	fc->net_ns	= get_net(current->nsproxy->net_ns);


	mutex_init(&fc->uapi_mutex);

	switch (purpose) {
	switch (purpose) {
	case FS_CONTEXT_FOR_MOUNT:
	case FS_CONTEXT_FOR_MOUNT:
		fc->user_ns = get_user_ns(fc->cred->user_ns);
		fc->user_ns = get_user_ns(fc->cred->user_ns);
@@ -353,6 +357,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
	if (!fc)
	if (!fc)
		return ERR_PTR(-ENOMEM);
		return ERR_PTR(-ENOMEM);


	mutex_init(&fc->uapi_mutex);

	fc->fs_private	= NULL;
	fc->fs_private	= NULL;
	fc->s_fs_info	= NULL;
	fc->s_fs_info	= NULL;
	fc->source	= NULL;
	fc->source	= NULL;
@@ -361,6 +367,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
	get_net(fc->net_ns);
	get_net(fc->net_ns);
	get_user_ns(fc->user_ns);
	get_user_ns(fc->user_ns);
	get_cred(fc->cred);
	get_cred(fc->cred);
	if (fc->log)
		refcount_inc(&fc->log->usage);


	/* Can't call put until we've called ->dup */
	/* Can't call put until we've called ->dup */
	ret = fc->ops->dup(fc, src_fc);
	ret = fc->ops->dup(fc, src_fc);
@@ -378,7 +386,6 @@ err_fc:
}
}
EXPORT_SYMBOL(vfs_dup_fs_context);
EXPORT_SYMBOL(vfs_dup_fs_context);


#ifdef CONFIG_PRINTK
/**
/**
 * logfc - Log a message to a filesystem context
 * logfc - Log a message to a filesystem context
 * @fc: The filesystem context to log to.
 * @fc: The filesystem context to log to.
@@ -386,27 +393,100 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
 */
 */
void logfc(struct fs_context *fc, const char *fmt, ...)
void logfc(struct fs_context *fc, const char *fmt, ...)
{
{
	static const char store_failure[] = "OOM: Can't store error string";
	struct fc_log *log = fc ? fc->log : NULL;
	const char *p;
	va_list va;
	va_list va;
	char *q;
	u8 freeable;


	va_start(va, fmt);
	va_start(va, fmt);

	if (!strchr(fmt, '%')) {
		p = fmt;
		goto unformatted_string;
	}
	if (strcmp(fmt, "%s") == 0) {
		p = va_arg(va, const char *);
		goto unformatted_string;
	}

	q = kvasprintf(GFP_KERNEL, fmt, va);
copied_string:
	if (!q)
		goto store_failure;
	freeable = 1;
	goto store_string;

unformatted_string:
	if ((unsigned long)p >= (unsigned long)__start_rodata &&
	    (unsigned long)p <  (unsigned long)__end_rodata)
		goto const_string;
	if (log && within_module_core((unsigned long)p, log->owner))
		goto const_string;
	q = kstrdup(p, GFP_KERNEL);
	goto copied_string;

store_failure:
	p = store_failure;
const_string:
	q = (char *)p;
	freeable = 0;
store_string:
	if (!log) {
		switch (fmt[0]) {
		switch (fmt[0]) {
		case 'w':
		case 'w':
		vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va);
			printk(KERN_WARNING "%s\n", q + 2);
			break;
			break;
		case 'e':
		case 'e':
		vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va);
			printk(KERN_ERR "%s\n", q + 2);
			break;
			break;
		default:
		default:
		vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va);
			printk(KERN_NOTICE "%s\n", q + 2);
			break;
			break;
		}
		}
		if (freeable)
			kfree(q);
	} else {
		unsigned int logsize = ARRAY_SIZE(log->buffer);
		u8 index;

		index = log->head & (logsize - 1);
		BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) ||
			     sizeof(log->tail) != sizeof(u8));
		if ((u8)(log->head - log->tail) == logsize) {
			/* The buffer is full, discard the oldest message */
			if (log->need_free & (1 << index))
				kfree(log->buffer[index]);
			log->tail++;
		}


	pr_cont("\n");
		log->buffer[index] = q;
		log->need_free &= ~(1 << index);
		log->need_free |= freeable << index;
		log->head++;
	}
	va_end(va);
	va_end(va);
}
}
EXPORT_SYMBOL(logfc);
EXPORT_SYMBOL(logfc);
#endif

/*
 * Free a logging structure.
 */
static void put_fc_log(struct fs_context *fc)
{
	struct fc_log *log = fc->log;
	int i;

	if (log) {
		if (refcount_dec_and_test(&log->usage)) {
			fc->log = NULL;
			for (i = 0; i <= 7; i++)
				if (log->need_free & (1 << i))
					kfree(log->buffer[i]);
			kfree(log);
		}
	}
}


/**
/**
 * put_fs_context - Dispose of a superblock configuration context.
 * put_fs_context - Dispose of a superblock configuration context.
@@ -431,6 +511,7 @@ void put_fs_context(struct fs_context *fc)
	put_user_ns(fc->user_ns);
	put_user_ns(fc->user_ns);
	put_cred(fc->cred);
	put_cred(fc->cred);
	kfree(fc->subtype);
	kfree(fc->subtype);
	put_fc_log(fc);
	put_filesystem(fc->fs_type);
	put_filesystem(fc->fs_type);
	kfree(fc->source);
	kfree(fc->source);
	kfree(fc);
	kfree(fc);
@@ -640,3 +721,54 @@ int parse_monolithic_mount_data(struct fs_context *fc, void *data)


	return monolithic_mount_data(fc, data);
	return monolithic_mount_data(fc, data);
}
}

/*
 * Clean up a context after performing an action on it and put it into a state
 * from where it can be used to reconfigure a superblock.
 *
 * Note that here we do only the parts that can't fail; the rest is in
 * finish_clean_context() below and in between those fs_context is marked
 * FS_CONTEXT_AWAITING_RECONF.  The reason for splitup is that after
 * successful mount or remount we need to report success to userland.
 * Trying to do full reinit (for the sake of possible subsequent remount)
 * and failing to allocate memory would've put us into a nasty situation.
 * So here we only discard the old state and reinitialization is left
 * until we actually try to reconfigure.
 */
void vfs_clean_context(struct fs_context *fc)
{
	if (fc->need_free && fc->ops && fc->ops->free)
		fc->ops->free(fc);
	fc->need_free = false;
	fc->fs_private = NULL;
	fc->s_fs_info = NULL;
	fc->sb_flags = 0;
	security_free_mnt_opts(&fc->security);
	kfree(fc->subtype);
	fc->subtype = NULL;
	kfree(fc->source);
	fc->source = NULL;

	fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
	fc->phase = FS_CONTEXT_AWAITING_RECONF;
}

int finish_clean_context(struct fs_context *fc)
{
	int error;

	if (fc->phase != FS_CONTEXT_AWAITING_RECONF)
		return 0;

	if (fc->fs_type->init_fs_context)
		error = fc->fs_type->init_fs_context(fc);
	else
		error = legacy_init_fs_context(fc);
	if (unlikely(error)) {
		fc->phase = FS_CONTEXT_FAILED;
		return error;
	}
	fc->need_free = true;
	fc->phase = FS_CONTEXT_RECONF_PARAMS;
	return 0;
}
Loading