Commit d748c623 authored by Matthew Wilcox's avatar Matthew Wilcox Committed by Niv Sardi
Browse files

[XFS] Convert l_flushsema to a sv_t



The l_flushsema doesn't exactly have completion semantics, nor mutex
semantics. It's used as a list of tasks which are waiting to be notified
that a flush has completed. It was also being used in a way that was
potentially racy, depending on the semaphore implementation.

By using a sv_t instead of a semaphore we avoid the need for a separate
counter, since we know we just need to wake everything on the queue.

Original waitqueue implementation from Matthew Wilcox. Cleanup and
conversion to sv_t by Christoph Hellwig.

SGI-PV: 981507
SGI-Modid: xfs-linux-melb:xfs-kern:31059a

Signed-off-by: default avatarMatthew Wilcox <willy@linux.intel.com>
Signed-off-by: default avatarChristoph Hellwig <hch@infradead.org>
Signed-off-by: default avatarDavid Chinner <dgc@sgi.com>
Signed-off-by: default avatarLachlan McIlroy <lachlan@sgi.com>
parent d729eae8
Loading
Loading
Loading
Loading
+13 −16
Original line number Diff line number Diff line
@@ -1232,7 +1232,7 @@ xlog_alloc_log(xfs_mount_t *mp,

	spin_lock_init(&log->l_icloglock);
	spin_lock_init(&log->l_grant_lock);
	initnsema(&log->l_flushsema, 0, "ic-flush");
	sv_init(&log->l_flush_wait, 0, "flush_wait");

	/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
	ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1577,7 +1577,6 @@ xlog_dealloc_log(xlog_t *log)
		kmem_free(iclog);
		iclog = next_iclog;
	}
	freesema(&log->l_flushsema);
	spinlock_destroy(&log->l_icloglock);
	spinlock_destroy(&log->l_grant_lock);

@@ -2101,6 +2100,7 @@ xlog_state_do_callback(
	int		   funcdidcallbacks; /* flag: function did callbacks */
	int		   repeats;	/* for issuing console warnings if
					 * looping too many times */
	int		   wake = 0;

	spin_lock(&log->l_icloglock);
	first_iclog = iclog = log->l_iclog;
@@ -2282,15 +2282,13 @@ xlog_state_do_callback(
	}
#endif

	flushcnt = 0;
	if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
		flushcnt = log->l_flushcnt;
		log->l_flushcnt = 0;
	}
	if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
		wake = 1;
	spin_unlock(&log->l_icloglock);
	while (flushcnt--)
		vsema(&log->l_flushsema);
}	/* xlog_state_do_callback */

	if (wake)
		sv_broadcast(&log->l_flush_wait);
}


/*
@@ -2388,16 +2386,15 @@ restart:
	}

	iclog = log->l_iclog;
	if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) {
		log->l_flushcnt++;
		spin_unlock(&log->l_icloglock);
	if (iclog->ic_state != XLOG_STATE_ACTIVE) {
		xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
		XFS_STATS_INC(xs_log_noiclogs);
		/* Ensure that log writes happen */
		psema(&log->l_flushsema, PINOD);

		/* Wait for log writes to have flushed */
		sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
		goto restart;
	}
	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);

	head = &iclog->ic_header;

	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
+2 −4
Original line number Diff line number Diff line
@@ -423,10 +423,8 @@ typedef struct log {
	int			l_logBBsize;    /* size of log in BB chunks */

	/* The following block of fields are changed while holding icloglock */
	sema_t			l_flushsema ____cacheline_aligned_in_smp;
						/* iclog flushing semaphore */
	int			l_flushcnt;	/* # of procs waiting on this
						 * sema */
	sv_t			l_flush_wait ____cacheline_aligned_in_smp;
						/* waiting for iclog flush */
	int			l_covered_state;/* state of "covering disk
						 * log entries" */
	xlog_in_core_t		*l_iclog;       /* head log queue	*/