Commit aa75f4d3 authored by Harshad Shirwadkar's avatar Harshad Shirwadkar Committed by Theodore Ts'o
Browse files

ext4: main fast-commit commit path



This patch adds main fast commit commit path handlers. The overall
patch can be divided into two inter-related parts:

(A) Metadata updates tracking

    This part consists of helper functions to track changes that need
    to be committed during a commit operation. These updates are
    maintained by Ext4 in different in-memory queues. Following are
    the APIs and their short description that are implemented in this
    patch:

    - ext4_fc_track_link/unlink/creat() - Track unlink. link and creat
      operations
    - ext4_fc_track_range() - Track changed logical block offsets
      inodes
    - ext4_fc_track_inode() - Track inodes
    - ext4_fc_mark_ineligible() - Mark file system fast commit
      ineligible()
    - ext4_fc_start_update() / ext4_fc_stop_update() /
      ext4_fc_start_ineligible() / ext4_fc_stop_ineligible() These
      functions are useful for co-ordinating inode updates with
      commits.

(B) Main commit Path

    This part consists of functions to convert updates tracked in
    in-memory data structures into on-disk commits. Function
    ext4_fc_commit() is the main entry point to commit path.

Reported-by: default avatarkernel test robot <lkp@intel.com>
Signed-off-by: default avatarHarshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20201015203802.3597742-6-harshadshirwadkar@gmail.com


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent ff780b91
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -242,6 +242,7 @@ retry:
	handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
	if (IS_ERR(handle))
		return PTR_ERR(handle);
	ext4_fc_start_update(inode);

	if ((type == ACL_TYPE_ACCESS) && acl) {
		error = posix_acl_update_mode(inode, &mode, &acl);
@@ -259,6 +260,7 @@ retry:
	}
out_stop:
	ext4_journal_stop(handle);
	ext4_fc_stop_update(inode);
	if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
		goto retry;
	return error;
+70 −0
Original line number Diff line number Diff line
@@ -1021,6 +1021,31 @@ struct ext4_inode_info {

	struct list_head i_orphan;	/* unlinked but open inodes */

	/* Fast commit related info */

	struct list_head i_fc_list;	/*
					 * inodes that need fast commit
					 * protected by sbi->s_fc_lock.
					 */

	/* Fast commit subtid when this inode was committed */
	unsigned int i_fc_committed_subtid;

	/* Start of lblk range that needs to be committed in this fast commit */
	ext4_lblk_t i_fc_lblk_start;

	/* End of lblk range that needs to be committed in this fast commit */
	ext4_lblk_t i_fc_lblk_len;

	/* Number of ongoing updates on this inode */
	atomic_t  i_fc_updates;

	/* Fast commit wait queue for this inode */
	wait_queue_head_t i_fc_wait;

	/* Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len */
	struct mutex i_fc_lock;

	/*
	 * i_disksize keeps track of what the inode size is ON DISK, not
	 * in memory.  During truncate, i_size is set to the new size by
@@ -1141,6 +1166,10 @@ struct ext4_inode_info {
#define	EXT4_VALID_FS			0x0001	/* Unmounted cleanly */
#define	EXT4_ERROR_FS			0x0002	/* Errors detected */
#define	EXT4_ORPHAN_FS			0x0004	/* Orphans being recovered */
#define EXT4_FC_INELIGIBLE		0x0008	/* Fast commit ineligible */
#define EXT4_FC_COMMITTING		0x0010	/* File system underoing a fast
						 * commit.
						 */

/*
 * Misc. filesystem flags
@@ -1613,6 +1642,30 @@ struct ext4_sb_info {
	/* Record the errseq of the backing block device */
	errseq_t s_bdev_wb_err;
	spinlock_t s_bdev_wb_lock;

	/* Ext4 fast commit stuff */
	atomic_t s_fc_subtid;
	atomic_t s_fc_ineligible_updates;
	/*
	 * After commit starts, the main queue gets locked, and the further
	 * updates get added in the staging queue.
	 */
#define FC_Q_MAIN	0
#define FC_Q_STAGING	1
	struct list_head s_fc_q[2];	/* Inodes staged for fast commit
					 * that have data changes in them.
					 */
	struct list_head s_fc_dentry_q[2];	/* directory entry updates */
	unsigned int s_fc_bytes;
	/*
	 * Main fast commit lock. This lock protects accesses to the
	 * following fields:
	 * ei->i_fc_list, s_fc_dentry_q, s_fc_q, s_fc_bytes, s_fc_bh.
	 */
	spinlock_t s_fc_lock;
	struct buffer_head *s_fc_bh;
	struct ext4_fc_stats s_fc_stats;
	u64 s_fc_avg_commit_time;
};

static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1723,6 +1776,7 @@ enum {
	EXT4_STATE_EXT_PRECACHED,	/* extents have been precached */
	EXT4_STATE_LUSTRE_EA_INODE,	/* Lustre-style ea_inode */
	EXT4_STATE_VERITY_IN_PROGRESS,	/* building fs-verity Merkle tree */
	EXT4_STATE_FC_COMMITTING,	/* Fast commit ongoing */
};

#define EXT4_INODE_BIT_FNS(name, field, offset)				\
@@ -2682,6 +2736,22 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* fast_commit.c */

void ext4_fc_init(struct super_block *sb, journal_t *journal);
void ext4_fc_init_inode(struct inode *inode);
void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
			 ext4_lblk_t end);
void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
void ext4_fc_track_inode(struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
void ext4_fc_start_ineligible(struct super_block *sb, int reason);
void ext4_fc_stop_ineligible(struct super_block *sb);
void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode);
int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
int __init ext4_fc_init_dentry_cache(void);

/* mballoc.c */
extern const struct seq_operations ext4_mb_seq_groups_ops;
extern long ext4_mb_stats;
+34 −14
Original line number Diff line number Diff line
@@ -3723,6 +3723,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
	ext4_ext_show_leaf(inode, path);
	ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
	return err;
}

@@ -3794,6 +3795,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
	if (*allocated > map->m_len)
		*allocated = map->m_len;
	map->m_len = *allocated;
	ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
	return 0;
}

@@ -4327,7 +4329,7 @@ got_allocated_blocks:
	map->m_len = ar.len;
	allocated = map->m_len;
	ext4_ext_show_leaf(inode, path);

	ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1);
out:
	ext4_ext_drop_refs(path);
	kfree(path);
@@ -4600,7 +4602,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
	ret = ext4_mark_inode_dirty(handle, inode);
	if (unlikely(ret))
		goto out_handle;

	ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
			(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
	/* Zero out partial block at the edges of the range */
	ret = ext4_zero_partial_blocks(handle, inode, offset, len);
	if (ret >= 0)
@@ -4648,23 +4651,34 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
		     FALLOC_FL_INSERT_RANGE))
		return -EOPNOTSUPP;
	ext4_fc_track_range(inode, offset >> blkbits,
			(offset + len - 1) >> blkbits);

	if (mode & FALLOC_FL_PUNCH_HOLE)
		return ext4_punch_hole(inode, offset, len);
	ext4_fc_start_update(inode);

	if (mode & FALLOC_FL_PUNCH_HOLE) {
		ret = ext4_punch_hole(inode, offset, len);
		goto exit;
	}

	ret = ext4_convert_inline_data(inode);
	if (ret)
		return ret;
		goto exit;

	if (mode & FALLOC_FL_COLLAPSE_RANGE)
		return ext4_collapse_range(inode, offset, len);

	if (mode & FALLOC_FL_INSERT_RANGE)
		return ext4_insert_range(inode, offset, len);
	if (mode & FALLOC_FL_COLLAPSE_RANGE) {
		ret = ext4_collapse_range(inode, offset, len);
		goto exit;
	}

	if (mode & FALLOC_FL_ZERO_RANGE)
		return ext4_zero_range(file, offset, len, mode);
	if (mode & FALLOC_FL_INSERT_RANGE) {
		ret = ext4_insert_range(inode, offset, len);
		goto exit;
	}

	if (mode & FALLOC_FL_ZERO_RANGE) {
		ret = ext4_zero_range(file, offset, len, mode);
		goto exit;
	}
	trace_ext4_fallocate_enter(inode, offset, len, mode);
	lblk = offset >> blkbits;

@@ -4698,12 +4712,14 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
		goto out;

	if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
		ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
		ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
					EXT4_I(inode)->i_sync_tid);
	}
out:
	inode_unlock(inode);
	trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
exit:
	ext4_fc_stop_update(inode);
	return ret;
}

@@ -5291,6 +5307,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
		ret = PTR_ERR(handle);
		goto out_mmap;
	}
	ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);

	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_discard_preallocations(inode, 0);
@@ -5329,6 +5346,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)

out_stop:
	ext4_journal_stop(handle);
	ext4_fc_stop_ineligible(sb);
out_mmap:
	up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:
@@ -5429,6 +5447,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
		ret = PTR_ERR(handle);
		goto out_mmap;
	}
	ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);

	/* Expand file to avoid data loss if there is error while shifting */
	inode->i_size += len;
@@ -5503,6 +5522,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)

out_stop:
	ext4_journal_stop(handle);
	ext4_fc_stop_ineligible(sb);
out_mmap:
	up_write(&EXT4_I(inode)->i_mmap_sem);
out_mutex:
+1183 −0

File changed.

Preview size limit exceeded, changes collapsed.

+110 −0
Original line number Diff line number Diff line
@@ -6,4 +6,114 @@
/* Number of blocks in journal area to allocate for fast commits */
#define EXT4_NUM_FC_BLKS		256

/* Fast commit tags */
#define EXT4_FC_TAG_ADD_RANGE		0x0001
#define EXT4_FC_TAG_DEL_RANGE		0x0002
#define EXT4_FC_TAG_CREAT		0x0003
#define EXT4_FC_TAG_LINK		0x0004
#define EXT4_FC_TAG_UNLINK		0x0005
#define EXT4_FC_TAG_INODE		0x0006
#define EXT4_FC_TAG_PAD			0x0007
#define EXT4_FC_TAG_TAIL		0x0008
#define EXT4_FC_TAG_HEAD		0x0009

#define EXT4_FC_SUPPORTED_FEATURES	0x0

/* On disk fast commit tlv value structures */

/* Fast commit on disk tag length structure */
struct ext4_fc_tl {
	__le16 fc_tag;
	__le16 fc_len;
};

/* Value structure for tag EXT4_FC_TAG_HEAD. */
struct ext4_fc_head {
	__le32 fc_features;
	__le32 fc_tid;
};

/* Value structure for EXT4_FC_TAG_ADD_RANGE. */
struct ext4_fc_add_range {
	__le32 fc_ino;
	__u8 fc_ex[12];
};

/* Value structure for tag EXT4_FC_TAG_DEL_RANGE. */
struct ext4_fc_del_range {
	__le32 fc_ino;
	__le32 fc_lblk;
	__le32 fc_len;
};

/*
 * This is the value structure for tags EXT4_FC_TAG_CREAT, EXT4_FC_TAG_LINK
 * and EXT4_FC_TAG_UNLINK.
 */
struct ext4_fc_dentry_info {
	__le32 fc_parent_ino;
	__le32 fc_ino;
	u8 fc_dname[0];
};

/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */
struct ext4_fc_inode {
	__le32 fc_ino;
	__u8 fc_raw_inode[0];
};

/* Value structure for tag EXT4_FC_TAG_TAIL. */
struct ext4_fc_tail {
	__le32 fc_tid;
	__le32 fc_crc;
};

/*
 * In memory list of dentry updates that are performed on the file
 * system used by fast commit code.
 */
struct ext4_fc_dentry_update {
	int fcd_op;		/* Type of update create / unlink / link */
	int fcd_parent;		/* Parent inode number */
	int fcd_ino;		/* Inode number */
	struct qstr fcd_name;	/* Dirent name */
	unsigned char fcd_iname[DNAME_INLINE_LEN];	/* Dirent name string */
	struct list_head fcd_list;
};

/*
 * Fast commit reason codes
 */
enum {
	/*
	 * Commit status codes:
	 */
	EXT4_FC_REASON_OK = 0,
	EXT4_FC_REASON_INELIGIBLE,
	EXT4_FC_REASON_ALREADY_COMMITTED,
	EXT4_FC_REASON_FC_START_FAILED,
	EXT4_FC_REASON_FC_FAILED,

	/*
	 * Fast commit ineligiblity reasons:
	 */
	EXT4_FC_REASON_XATTR = 0,
	EXT4_FC_REASON_CROSS_RENAME,
	EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
	EXT4_FC_REASON_MEM,
	EXT4_FC_REASON_SWAP_BOOT,
	EXT4_FC_REASON_RESIZE,
	EXT4_FC_REASON_RENAME_DIR,
	EXT4_FC_REASON_FALLOC_RANGE,
	EXT4_FC_COMMIT_FAILED,
	EXT4_FC_REASON_MAX
};

struct ext4_fc_stats {
	unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX];
	unsigned long fc_num_commits;
	unsigned long fc_ineligible_commits;
	unsigned long fc_numblks;
};

#endif /* __FAST_COMMIT_H__ */
Loading