xfs: log timestamp updates (8a9c9980) · Commits · 戴 / test

fs/xfs/xfs_file.c

+14 −69

Original line number	Diff line number	Diff line
		@@ -163,7 +163,6 @@ xfs_file_fsync(
		struct inode *inode = file->f_mapping->host;
		struct xfs_inode *ip = XFS_I(inode);
		struct xfs_mount *mp = ip->i_mount;
		struct xfs_trans *tp;
		int error = 0;
		int log_flushed = 0;
		xfs_lsn_t lsn = 0;
		@@ -194,75 +193,15 @@ xfs_file_fsync(
		}

		/*
		* We always need to make sure that the required inode state is safe on
		* disk. The inode might be clean but we still might need to force the
		* log because of committed transactions that haven't hit the disk yet.
		* Likewise, there could be unflushed non-transactional changes to the
		* inode core that have to go to disk and this requires us to issue
		* a synchronous transaction to capture these changes correctly.
		*
		* This code relies on the assumption that if the i_update_core field
		* of the inode is clear and the inode is unpinned then it is clean
		* and no action is required.
		* All metadata updates are logged, which means that we just have
		* to flush the log up to the latest LSN that touched the inode.
		*/
		xfs_ilock(ip, XFS_ILOCK_SHARED);

		/*
		* First check if the VFS inode is marked dirty. All the dirtying
		* of non-transactional updates do not go through mark_inode_dirty*,
		* which allows us to distinguish between pure timestamp updates
		* and i_size updates which need to be caught for fdatasync.
		* After that also check for the dirty state in the XFS inode, which
		* might gets cleared when the inode gets written out via the AIL
		* or xfs_iflush_cluster.
		*/
		if (((inode->i_state & I_DIRTY_DATASYNC) \|\|
		((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
		ip->i_update_core) {
		/*
		* Kick off a transaction to log the inode core to get the
		* updates. The sync transaction will also force the log.
		*/
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
		error = xfs_trans_reserve(tp, 0,
		XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
		if (error) {
		xfs_trans_cancel(tp, 0);
		return -error;
		}
		xfs_ilock(ip, XFS_ILOCK_EXCL);

		/*
		* Note - it's possible that we might have pushed ourselves out
		* of the way during trans_reserve which would flush the inode.
		* But there's no guarantee that the inode buffer has actually
		* gone out yet (it's delwri). Plus the buffer could be pinned
		* anyway if it's part of an inode in another recent
		* transaction. So we play it safe and fire off the
		* transaction anyway.
		*/
		xfs_trans_ijoin(tp, ip, 0);
		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
		error = xfs_trans_commit(tp, 0);

		lsn = ip->i_itemp->ili_last_lsn;
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		} else {
		/*
		* Timestamps/size haven't changed since last inode flush or
		* inode transaction commit. That means either nothing got
		* written or a transaction committed which caught the updates.
		* If the latter happened and the transaction hasn't hit the
		* disk yet, the inode will be still be pinned. If it is,
		* force the log.
		*/
		if (xfs_ipincount(ip))
		lsn = ip->i_itemp->ili_last_lsn;
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		}

		if (!error && lsn)
		if (lsn)
		error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);

		/*
		@@ -659,9 +598,6 @@ restart:
		return error;
		}

		if (likely(!(file->f_mode & FMODE_NOCMTIME)))
		file_update_time(file);

		/*
		* If the offset is beyond the size of the file, we need to zero any
		* blocks that fall between the existing EOF and the start of this
		@@ -684,6 +620,15 @@ restart:
		if (error)
		return error;

		/*
		* Updating the timestamps will grab the ilock again from
		* xfs_fs_dirty_inode, so we have to call it after dropping the
		* lock above. Eventually we should look into a way to avoid
		* the pointless lock roundtrip.
		*/
		if (likely(!(file->f_mode & FMODE_NOCMTIME)))
		file_update_time(file);

		/*
		* If we're writing the file then make sure to clear the setuid and
		* setgid bits if the process is not being run by root. This keeps

fs/xfs/xfs_iget.c

+0 −1

Original line number	Diff line number	Diff line
		@@ -91,7 +91,6 @@ xfs_inode_alloc(
		ip->i_afp = NULL;
		memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
		ip->i_flags = 0;
		ip->i_update_core = 0;
		ip->i_delayed_blks = 0;
		memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));

fs/xfs/xfs_inode.c

+1 −24

Original line number	Diff line number	Diff line
		@@ -1656,7 +1656,6 @@ retry:
		iip = ip->i_itemp;
		if (!iip \|\| xfs_inode_clean(ip)) {
		ASSERT(ip != free_ip);
		ip->i_update_core = 0;
		xfs_ifunlock(ip);
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		continue;
		@@ -2451,7 +2450,6 @@ xfs_iflush(
		* to disk, because the log record didn't make it to disk!
		*/
		if (XFS_FORCED_SHUTDOWN(mp)) {
		ip->i_update_core = 0;
		if (iip)
		iip->ili_format.ilf_fields = 0;
		xfs_ifunlock(ip);
		@@ -2533,26 +2531,6 @@ xfs_iflush_int(
		/* set dip = inode's place in the buffer /
		dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);

		/*
		* Clear i_update_core before copying out the data.
		* This is for coordination with our timestamp updates
		* that don't hold the inode lock. They will always
		* update the timestamps BEFORE setting i_update_core,
		* so if we clear i_update_core after they set it we
		* are guaranteed to see their updates to the timestamps.
		* I believe that this depends on strongly ordered memory
		* semantics, but we have that. We use the SYNCHRONIZE
		* macro to make sure that the compiler does not reorder
		* the i_update_core access below the data copy below.
		*/
		ip->i_update_core = 0;
		SYNCHRONIZE();

		/*
		* Make sure to get the latest timestamps from the Linux inode.
		*/
		xfs_synchronize_times(ip);

		if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
		mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
		@@ -2711,8 +2689,7 @@ xfs_iflush_int(
		} else {
		/*
		* We're flushing an inode which is not in the AIL and has
		* not been logged but has i_update_core set. For this
		* case we can use a B_DELWRI flush and immediately drop
		* not been logged. For this case we can immediately drop
		* the inode flush lock because we can avoid the whole
		* AIL state thing. It's OK to drop the flush lock now,
		* because we've already locked the buffer and to do anything

fs/xfs/xfs_inode.h

+0 −5

Original line number	Diff line number	Diff line
		@@ -241,7 +241,6 @@ typedef struct xfs_inode {
		spinlock_t i_flags_lock; /* inode i_flags lock */
		/* Miscellaneous state. */
		unsigned long i_flags; /* see defined flags below */
		unsigned char i_update_core; /* timestamps/size is dirty */
		unsigned int i_delayed_blks; /* count of delay alloc blks */

		xfs_icdinode_t i_d; /* most of ondisk inode */
		@@ -534,10 +533,6 @@ void xfs_promote_inode(struct xfs_inode *);
		void xfs_lock_inodes(xfs_inode_t **, int, uint);
		void xfs_lock_two_inodes(xfs_inode_t , xfs_inode_t , uint);

		void xfs_synchronize_times(xfs_inode_t *);
		void xfs_mark_inode_dirty(xfs_inode_t *);
		void xfs_mark_inode_dirty_sync(xfs_inode_t *);

		#define IHOLD(ip) \
		do { \
		ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \

fs/xfs/xfs_inode_item.c

+0 −36

Original line number	Diff line number	Diff line
		@@ -254,42 +254,6 @@ xfs_inode_item_format(
		vecp++;
		nvecs = 1;

		/*
		* Clear i_update_core if the timestamps (or any other
		* non-transactional modification) need flushing/logging
		* and we're about to log them with the rest of the core.
		*
		* This is the same logic as xfs_iflush() but this code can't
		* run at the same time as xfs_iflush because we're in commit
		* processing here and so we have the inode lock held in
		* exclusive mode. Although it doesn't really matter
		* for the timestamps if both routines were to grab the
		* timestamps or not. That would be ok.
		*
		* We clear i_update_core before copying out the data.
		* This is for coordination with our timestamp updates
		* that don't hold the inode lock. They will always
		* update the timestamps BEFORE setting i_update_core,
		* so if we clear i_update_core after they set it we
		* are guaranteed to see their updates to the timestamps
		* either here. Likewise, if they set it after we clear it
		* here, we'll see it either on the next commit of this
		* inode or the next time the inode gets flushed via
		* xfs_iflush(). This depends on strongly ordered memory
		* semantics, but we have that. We use the SYNCHRONIZE
		* macro to make sure that the compiler does not reorder
		* the i_update_core access below the data copy below.
		*/
		if (ip->i_update_core) {
		ip->i_update_core = 0;
		SYNCHRONIZE();
		}

		/*
		* Make sure to get the latest timestamps from the Linux inode.
		*/
		xfs_synchronize_times(ip);

		vecp->i_addr = &ip->i_d;
		vecp->i_len = sizeof(struct xfs_icdinode);
		vecp->i_type = XLOG_REG_TYPE_ICORE;

Admin message