Commit 50b8b3f8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ext4 updates from Ted Ts'o:
 "This merge window saw the the following new featuers added to ext4:

   - Direct I/O via iomap (required the iomap-for-next branch from
     Darrick as a prereq).

   - Support for using dioread-nolock where the block size < page size.

   - Support for encryption for file systems where the block size < page
     size.

   - Rework of journal credits handling so a revoke-heavy workload will
     not cause the journal to run out of space.

   - Replace bit-spinlocks with spinlocks in jbd2

  Also included were some bug fixes and cleanups, mostly to clean up
  corner cases from fuzzed file systems and error path handling"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (59 commits)
  ext4: work around deleting a file with i_nlink == 0 safely
  ext4: add more paranoia checking in ext4_expand_extra_isize handling
  jbd2: make jbd2_handle_buffer_credits() handle reserved handles
  ext4: fix a bug in ext4_wait_for_tail_page_commit
  ext4: bio_alloc with __GFP_DIRECT_RECLAIM never fails
  ext4: code cleanup for get_next_id
  ext4: fix leak of quota reservations
  ext4: remove unused variable warning in parse_options()
  ext4: Enable encryption for subpage-sized blocks
  fs/buffer.c: support fscrypt in block_read_full_page()
  ext4: Add error handling for io_end_vec struct allocation
  jbd2: Fine tune estimate of necessary descriptor blocks
  jbd2: Provide trace event for handle restarts
  ext4: Reserve revoke credits for freed blocks
  jbd2: Make credit checking more strict
  jbd2: Rename h_buffer_credits to h_total_credits
  jbd2: Reserve space for revoke descriptor blocks
  jbd2: Drop jbd2_space_needed()
  jbd2: Account descriptor blocks into t_outstanding_credits
  jbd2: Factor out common parts of stopping and restarting a handle
  ...
parents f112a2fd dfdeeb41
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -342,8 +342,8 @@ Contents encryption
-------------------

For file contents, each filesystem block is encrypted independently.
Currently, only the case where the filesystem block size is equal to
the system's page size (usually 4096 bytes) is supported.
Starting from Linux kernel 5.5, encryption of filesystems with block
size less than system's page size is supported.

Each block's IV is set to the logical block number within the file as
a little endian number, except that:
+43 −5
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@
#include <linux/pagevec.h>
#include <linux/sched/mm.h>
#include <trace/events/block.h>
#include <linux/fscrypt.h>

static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
@@ -246,10 +247,6 @@ out:
	return ret;
}

/*
 * I/O completion handler for block_read_full_page() - pages
 * which come unlocked at the end of I/O.
 */
static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
	unsigned long flags;
@@ -307,6 +304,47 @@ still_busy:
	return;
}

struct decrypt_bh_ctx {
	struct work_struct work;
	struct buffer_head *bh;
};

static void decrypt_bh(struct work_struct *work)
{
	struct decrypt_bh_ctx *ctx =
		container_of(work, struct decrypt_bh_ctx, work);
	struct buffer_head *bh = ctx->bh;
	int err;

	err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
					       bh_offset(bh));
	end_buffer_async_read(bh, err == 0);
	kfree(ctx);
}

/*
 * I/O completion handler for block_read_full_page() - pages
 * which come unlocked at the end of I/O.
 */
static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
{
	/* Decrypt if needed */
	if (uptodate && IS_ENABLED(CONFIG_FS_ENCRYPTION) &&
	    IS_ENCRYPTED(bh->b_page->mapping->host) &&
	    S_ISREG(bh->b_page->mapping->host->i_mode)) {
		struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);

		if (ctx) {
			INIT_WORK(&ctx->work, decrypt_bh);
			ctx->bh = bh;
			fscrypt_enqueue_decrypt_work(&ctx->work);
			return;
		}
		uptodate = 0;
	}
	end_buffer_async_read(bh, uptodate);
}

/*
 * Completion handler for block_write_full_page() - pages which are unlocked
 * during I/O, and which have PageWriteback cleared upon I/O completion.
@@ -379,7 +417,7 @@ EXPORT_SYMBOL(end_buffer_async_write);
 */
static void mark_buffer_async_read(struct buffer_head *bh)
{
	bh->b_end_io = end_buffer_async_read;
	bh->b_end_io = end_buffer_async_read_io;
	set_buffer_async_read(bh);
}

+16 −6
Original line number Diff line number Diff line
@@ -198,6 +198,12 @@ struct ext4_system_blocks {
 */
#define	EXT4_IO_END_UNWRITTEN	0x0001

struct ext4_io_end_vec {
	struct list_head list;		/* list of io_end_vec */
	loff_t offset;			/* offset in the file */
	ssize_t size;			/* size of the extent */
};

/*
 * For converting unwritten extents on a work queue. 'handle' is used for
 * buffered writeback.
@@ -211,8 +217,7 @@ typedef struct ext4_io_end {
						 * bios covering the extent */
	unsigned int		flag;		/* unwritten or not */
	atomic_t		count;		/* reference counter */
	loff_t			offset;		/* offset in the file */
	ssize_t			size;		/* size of the extent */
	struct list_head	list_vec;	/* list of ext4_io_end_vec */
} ext4_io_end_t;

struct ext4_io_submit {
@@ -1579,7 +1584,6 @@ enum {
	EXT4_STATE_NO_EXPAND,		/* No space for expansion */
	EXT4_STATE_DA_ALLOC_CLOSE,	/* Alloc DA blks on close */
	EXT4_STATE_EXT_MIGRATE,		/* Inode is migrating */
	EXT4_STATE_DIO_UNWRITTEN,	/* need convert on dio done*/
	EXT4_STATE_NEWENTRY,		/* File just added to dir */
	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
	EXT4_STATE_EXT_PRECACHED,	/* extents have been precached */
@@ -2562,8 +2566,6 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
			     struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
		   struct buffer_head *bh_result, int create);
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
		       struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
			   struct buffer_head *bh, int create);
int ext4_walk_page_buffers(handle_t *handle,
@@ -2606,7 +2608,6 @@ extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
@@ -3266,6 +3267,8 @@ extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
			  loff_t len);
extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
					  loff_t offset, ssize_t len);
extern int ext4_convert_unwritten_io_end_vec(handle_t *handle,
					     ext4_io_end_t *io_end);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
			   struct ext4_map_blocks *map, int flags);
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
@@ -3298,6 +3301,10 @@ extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
			     ext4_lblk_t lblk2,  ext4_lblk_t count,
			     int mark_unwritten,int *err);
extern int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu);
extern int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
				       int check_cred, int restart_cred,
				       int revoke_cred);


/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -3324,6 +3331,8 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
			       int len,
			       struct writeback_control *wbc,
			       bool keep_towrite);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);

/* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
@@ -3381,6 +3390,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
}

extern const struct iomap_ops ext4_iomap_ops;
extern const struct iomap_ops ext4_iomap_report_ops;

static inline int ext4_buffer_uptodate(struct buffer_head *bh)
{
+24 −8
Original line number Diff line number Diff line
@@ -65,12 +65,14 @@ static int ext4_journal_check_start(struct super_block *sb)
}

handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
				  int type, int blocks, int rsv_blocks)
				  int type, int blocks, int rsv_blocks,
				  int revoke_creds)
{
	journal_t *journal;
	int err;

	trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_);
	trace_ext4_journal_start(sb, blocks, rsv_blocks, revoke_creds,
				 _RET_IP_);
	err = ext4_journal_check_start(sb);
	if (err < 0)
		return ERR_PTR(err);
@@ -78,8 +80,8 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
	journal = EXT4_SB(sb)->s_journal;
	if (!journal)
		return ext4_get_nojournal();
	return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS,
				   type, line);
	return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds,
				   GFP_NOFS, type, line);
}

int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
@@ -119,8 +121,8 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
		return ext4_get_nojournal();

	sb = handle->h_journal->j_private;
	trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits,
					  _RET_IP_);
	trace_ext4_journal_start_reserved(sb,
				jbd2_handle_buffer_credits(handle), _RET_IP_);
	err = ext4_journal_check_start(sb);
	if (err < 0) {
		jbd2_journal_free_reserved(handle);
@@ -133,6 +135,19 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
	return handle;
}

int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
				  int extend_cred, int revoke_cred)
{
	if (!ext4_handle_valid(handle))
		return 0;
	if (jbd2_handle_buffer_credits(handle) >= check_cred &&
	    handle->h_revoke_credits >= revoke_cred)
		return 0;
	extend_cred = max(0, extend_cred - jbd2_handle_buffer_credits(handle));
	revoke_cred = max(0, revoke_cred - handle->h_revoke_credits);
	return ext4_journal_extend(handle, extend_cred, revoke_cred);
}

static void ext4_journal_abort_handle(const char *caller, unsigned int line,
				      const char *err_fn,
				      struct buffer_head *bh,
@@ -278,7 +293,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
				       handle->h_type,
				       handle->h_line_no,
				       handle->h_requested_credits,
				       handle->h_buffer_credits, err);
				       jbd2_handle_buffer_credits(handle), err);
				return err;
			}
			ext4_error_inode(inode, where, line,
@@ -289,7 +304,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
					 handle->h_type,
					 handle->h_line_no,
					 handle->h_requested_credits,
					 handle->h_buffer_credits, err);
					 jbd2_handle_buffer_credits(handle),
					 err);
		}
	} else {
		if (inode)
+91 −15
Original line number Diff line number Diff line
@@ -261,7 +261,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
	__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))

handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
				  int type, int blocks, int rsv_blocks);
				  int type, int blocks, int rsv_blocks,
				  int revoke_creds);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);

#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -288,28 +289,41 @@ static inline int ext4_handle_is_aborted(handle_t *handle)
	return 0;
}

static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
static inline int ext4_free_metadata_revoke_credits(struct super_block *sb,
						    int blocks)
{
	if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
		return 0;
	return 1;
	/* Freeing each metadata block can result in freeing one cluster */
	return blocks * EXT4_SB(sb)->s_cluster_ratio;
}

static inline int ext4_trans_default_revoke_credits(struct super_block *sb)
{
	return ext4_free_metadata_revoke_credits(sb, 8);
}

#define ext4_journal_start_sb(sb, type, nblocks)			\
	__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0)
	__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0,	\
				ext4_trans_default_revoke_credits(sb))

#define ext4_journal_start(inode, type, nblocks)			\
	__ext4_journal_start((inode), __LINE__, (type), (nblocks), 0)
	__ext4_journal_start((inode), __LINE__, (type), (nblocks), 0,	\
			     ext4_trans_default_revoke_credits((inode)->i_sb))

#define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks)\
	__ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks))
	__ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks),\
			     ext4_trans_default_revoke_credits((inode)->i_sb))

#define ext4_journal_start_with_revoke(inode, type, blocks, revoke_creds) \
	__ext4_journal_start((inode), __LINE__, (type), (blocks), 0,	\
			     (revoke_creds))

static inline handle_t *__ext4_journal_start(struct inode *inode,
					     unsigned int line, int type,
					     int blocks, int rsv_blocks)
					     int blocks, int rsv_blocks,
					     int revoke_creds)
{
	return __ext4_journal_start_sb(inode->i_sb, line, type, blocks,
				       rsv_blocks);
				       rsv_blocks, revoke_creds);
}

#define ext4_journal_stop(handle) \
@@ -332,20 +346,68 @@ static inline handle_t *ext4_journal_current_handle(void)
	return journal_current_handle();
}

static inline int ext4_journal_extend(handle_t *handle, int nblocks)
static inline int ext4_journal_extend(handle_t *handle, int nblocks, int revoke)
{
	if (ext4_handle_valid(handle))
		return jbd2_journal_extend(handle, nblocks);
		return jbd2_journal_extend(handle, nblocks, revoke);
	return 0;
}

static inline int ext4_journal_restart(handle_t *handle, int nblocks)
static inline int ext4_journal_restart(handle_t *handle, int nblocks,
				       int revoke)
{
	if (ext4_handle_valid(handle))
		return jbd2_journal_restart(handle, nblocks);
		return jbd2__journal_restart(handle, nblocks, revoke, GFP_NOFS);
	return 0;
}

int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
				  int extend_cred, int revoke_cred);


/*
 * Ensure @handle has at least @check_creds credits available. If not,
 * transaction will be extended or restarted to contain at least @extend_cred
 * credits. Before restarting transaction @fn is executed to allow for cleanup
 * before the transaction is restarted.
 *
 * The return value is < 0 in case of error, 0 in case the handle has enough
 * credits or transaction extension succeeded, 1 in case transaction had to be
 * restarted.
 */
#define ext4_journal_ensure_credits_fn(handle, check_cred, extend_cred,	\
				       revoke_cred, fn) \
({									\
	__label__ __ensure_end;						\
	int err = __ext4_journal_ensure_credits((handle), (check_cred),	\
					(extend_cred), (revoke_cred));	\
									\
	if (err <= 0)							\
		goto __ensure_end;					\
	err = (fn);							\
	if (err < 0)							\
		goto __ensure_end;					\
	err = ext4_journal_restart((handle), (extend_cred), (revoke_cred)); \
	if (err == 0)							\
		err = 1;						\
__ensure_end:								\
	err;								\
})

/*
 * Ensure given handle has at least requested amount of credits available,
 * possibly restarting transaction if needed. We also make sure the transaction
 * has space for at least ext4_trans_default_revoke_credits(sb) revoke records
 * as freeing one or two blocks is very common pattern and requesting this is
 * very cheap.
 */
static inline int ext4_journal_ensure_credits(handle_t *handle, int credits,
					      int revoke_creds)
{
	return ext4_journal_ensure_credits_fn(handle, credits, credits,
				revoke_creds, 0);
}

static inline int ext4_journal_blocks_per_page(struct inode *inode)
{
	if (EXT4_JOURNAL(inode) != NULL)
@@ -407,6 +469,7 @@ static inline int ext4_inode_journal_mode(struct inode *inode)
		return EXT4_INODE_WRITEBACK_DATA_MODE;	/* writeback */
	/* We do not support data journalling with delayed allocation */
	if (!S_ISREG(inode->i_mode) ||
	    ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) ||
	    test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
	    (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
	    !test_opt(inode->i_sb, DELALLOC))) {
@@ -437,6 +500,19 @@ static inline int ext4_should_writeback_data(struct inode *inode)
	return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
}

static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks)
{
	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
		return 0;
	if (!ext4_should_journal_data(inode))
		return 0;
	/*
	 * Data blocks in one extent are contiguous, just account for partial
	 * clusters at extent boundaries
	 */
	return blocks + 2*(EXT4_SB(inode->i_sb)->s_cluster_ratio - 1);
}

/*
 * This function controls whether or not we should try to go down the
 * dioread_nolock code paths, which makes it safe to avoid taking
Loading