Commit 48751b56 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull overlayfs fixes from Miklos Szeredi:
 "This fixes a regression in the recent file stacking update, reported
  and fixed by Amir Goldstein. The fix is fairly trivial, but involves
  adding a fadvise() f_op and the associated churn in the vfs. As
  discussed on -fsdevel, there are other possible uses for this method,
  than allowing proper stacking for overlays.

  And there's one other fix for a syzkaller detected oops"

* tag 'ovl-fixes-4.19-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
  ovl: fix oopses in ovl_fill_super() failure paths
  ovl: add ovl_fadvise()
  vfs: implement readahead(2) using POSIX_FADV_WILLNEED
  vfs: add the fadvise() file operation
  Documentation/filesystems: update documentation of file_operations
  ovl: fix GPF in swapfile_activate of file from overlayfs over xfs
  ovl: respect FIEMAP_FLAG_SYNC flag
parents 4d8d9f54 8c25741a
Loading
Loading
Loading
Loading
+19 −2
Original line number Diff line number Diff line
@@ -848,7 +848,7 @@ struct file_operations
----------------------

This describes how the VFS can manipulate an open file. As of kernel
4.1, the following members are defined:
4.18, the following members are defined:

struct file_operations {
	struct module *owner;
@@ -858,11 +858,11 @@ struct file_operations {
	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
	int (*iterate) (struct file *, struct dir_context *);
	int (*iterate_shared) (struct file *, struct dir_context *);
	__poll_t (*poll) (struct file *, struct poll_table_struct *);
	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
	int (*mmap) (struct file *, struct vm_area_struct *);
	int (*mremap)(struct file *, struct vm_area_struct *);
	int (*open) (struct inode *, struct file *);
	int (*flush) (struct file *, fl_owner_t id);
	int (*release) (struct inode *, struct file *);
@@ -882,6 +882,10 @@ struct file_operations {
#ifndef CONFIG_MMU
	unsigned (*mmap_capabilities)(struct file *);
#endif
	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
	int (*fadvise)(struct file *, loff_t, loff_t, int);
};

Again, all methods are called without any locks being held, unless
@@ -899,6 +903,9 @@ otherwise noted.

  iterate: called when the VFS needs to read the directory contents

  iterate_shared: called when the VFS needs to read the directory contents
	when filesystem supports concurrent dir iterators

  poll: called by the VFS when a process wants to check if there is
	activity on this file and (optionally) go to sleep until there
	is activity. Called by the select(2) and poll(2) system calls
@@ -951,6 +958,16 @@ otherwise noted.

  fallocate: called by the VFS to preallocate blocks or punch a hole.

  copy_file_range: called by the copy_file_range(2) system call.

  clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
	FICLONE commands.

  dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
	command.

  fadvise: possibly called by the fadvise64() system call.

Note that the file operations are implemented by the specific
filesystem in which the inode resides. When opening a device node
(character or block special) most filesystems will call special
+20 −3
Original line number Diff line number Diff line
@@ -131,9 +131,6 @@ static int ovl_open(struct inode *inode, struct file *file)
	if (IS_ERR(realfile))
		return PTR_ERR(realfile);

	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
	file->f_mapping = realfile->f_mapping;

	file->private_data = realfile;

	return 0;
@@ -334,6 +331,25 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len
	return ret;
}

static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
{
	struct fd real;
	const struct cred *old_cred;
	int ret;

	ret = ovl_real_fdget(file, &real);
	if (ret)
		return ret;

	old_cred = ovl_override_creds(file_inode(file)->i_sb);
	ret = vfs_fadvise(real.file, offset, len, advice);
	revert_creds(old_cred);

	fdput(real);

	return ret;
}

static long ovl_real_ioctl(struct file *file, unsigned int cmd,
			   unsigned long arg)
{
@@ -502,6 +518,7 @@ const struct file_operations ovl_file_operations = {
	.fsync		= ovl_fsync,
	.mmap		= ovl_mmap,
	.fallocate	= ovl_fallocate,
	.fadvise	= ovl_fadvise,
	.unlocked_ioctl	= ovl_ioctl,
	.compat_ioctl	= ovl_compat_ioctl,

+10 −0
Original line number Diff line number Diff line
@@ -467,6 +467,10 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		return -EOPNOTSUPP;

	old_cred = ovl_override_creds(inode->i_sb);

	if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
		filemap_write_and_wait(realinode->i_mapping);

	err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
	revert_creds(old_cred);

@@ -500,6 +504,11 @@ static const struct inode_operations ovl_special_inode_operations = {
	.update_time	= ovl_update_time,
};

const struct address_space_operations ovl_aops = {
	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
	.direct_IO		= noop_direct_IO,
};

/*
 * It is possible to stack overlayfs instance on top of another
 * overlayfs instance as lower layer. We need to annonate the
@@ -571,6 +580,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
	case S_IFREG:
		inode->i_op = &ovl_file_inode_operations;
		inode->i_fop = &ovl_file_operations;
		inode->i_mapping->a_ops = &ovl_aops;
		break;

	case S_IFDIR:
+14 −12
Original line number Diff line number Diff line
@@ -982,16 +982,6 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
	if (err)
		goto out;

	err = -EBUSY;
	if (ovl_inuse_trylock(upperpath->dentry)) {
		ofs->upperdir_locked = true;
	} else if (ofs->config.index) {
		pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
		goto out;
	} else {
		pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}

	upper_mnt = clone_private_mount(upperpath);
	err = PTR_ERR(upper_mnt);
	if (IS_ERR(upper_mnt)) {
@@ -1002,6 +992,17 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
	/* Don't inherit atime flags */
	upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
	ofs->upper_mnt = upper_mnt;

	err = -EBUSY;
	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
		ofs->upperdir_locked = true;
	} else if (ofs->config.index) {
		pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
		goto out;
	} else {
		pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}

	err = 0;
out:
	return err;
@@ -1101,8 +1102,10 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
		goto out;
	}

	ofs->workbasedir = dget(workpath.dentry);

	err = -EBUSY;
	if (ovl_inuse_trylock(workpath.dentry)) {
	if (ovl_inuse_trylock(ofs->workbasedir)) {
		ofs->workdir_locked = true;
	} else if (ofs->config.index) {
		pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
@@ -1111,7 +1114,6 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
		pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
	}

	ofs->workbasedir = dget(workpath.dentry);
	err = ovl_make_workdir(ofs, &workpath);
	if (err)
		goto out;
+5 −0
Original line number Diff line number Diff line
@@ -1763,6 +1763,7 @@ struct file_operations {
			u64);
	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
			u64);
	int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;

struct inode_operations {
@@ -3459,4 +3460,8 @@ static inline bool dir_relax_shared(struct inode *inode)
extern bool path_noexec(const struct path *path);
extern void inode_nohighmem(struct inode *inode);

/* mm/fadvise.c */
extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
		       int advice);

#endif /* _LINUX_FS_H */
Loading