Commit b17170b2 authored by Matthew Wilcox's avatar Matthew Wilcox Committed by Matthew Wilcox
Browse files

Simplify semaphore implementation



By removing the negative values of 'count' and relying on the wait_list to
indicate whether we have any waiters, we can simplify the implementation
by removing the protection against an unlikely race condition.  Thanks to
David Howells for his suggestions.

Signed-off-by: default avatarMatthew Wilcox <willy@linux.intel.com>
parent f1241c87
Loading
Loading
Loading
Loading
+3 −6
Original line number Diff line number Diff line
@@ -15,15 +15,12 @@

/*
 * The spinlock controls access to the other members of the semaphore.
 * 'count' is decremented by every task which calls down*() and incremented
 * by every call to up().  Thus, if it is positive, it indicates how many
 * more tasks may acquire the lock.  If it is negative, it indicates how
 * many tasks are waiting for the lock.  Tasks waiting for the lock are
 * kept on the wait_list.
 * 'count' represents how many more tasks can acquire this semaphore.
 * Tasks waiting for the lock are kept on the wait_list.
 */
struct semaphore {
	spinlock_t		lock;
	int			count;
	unsigned int		count;
	struct list_head	wait_list;
};

+24 −54
Original line number Diff line number Diff line
@@ -18,18 +18,8 @@
 * down_trylock() and up() can be called from interrupt context.
 * So we have to disable interrupts when taking the lock.
 *
 * The ->count variable, if positive, defines how many more tasks can
 * acquire the semaphore.  If negative, it represents how many tasks are
 * waiting on the semaphore (*).  If zero, no tasks are waiting, and no more
 * tasks can acquire the semaphore.
 *
 * (*) Except for the window between one task calling up() and the task
 * sleeping in a __down_common() waking up.  In order to avoid a third task
 * coming in and stealing the second task's wakeup, we leave the ->count
 * negative.  If we have a more complex situation, the ->count may become
 * zero or negative (eg a semaphore with count = 2, three tasks attempt to
 * acquire it, one sleeps, two finish and call up(), the second task to call
 * up() notices that the list is empty and just increments count).
 * The ->count variable defines how many more tasks can acquire the
 * semaphore.  If it's zero, there may be tasks waiting on the list.
 */

static noinline void __down(struct semaphore *sem);
@@ -43,7 +33,9 @@ void down(struct semaphore *sem)
	unsigned long flags;

	spin_lock_irqsave(&sem->lock, flags);
	if (unlikely(sem->count-- <= 0))
	if (likely(sem->count > 0))
		sem->count--;
	else
		__down(sem);
	spin_unlock_irqrestore(&sem->lock, flags);
}
@@ -55,7 +47,9 @@ int down_interruptible(struct semaphore *sem)
	int result = 0;

	spin_lock_irqsave(&sem->lock, flags);
	if (unlikely(sem->count-- <= 0))
	if (likely(sem->count > 0))
		sem->count--;
	else
		result = __down_interruptible(sem);
	spin_unlock_irqrestore(&sem->lock, flags);

@@ -69,7 +63,9 @@ int down_killable(struct semaphore *sem)
	int result = 0;

	spin_lock_irqsave(&sem->lock, flags);
	if (unlikely(sem->count-- <= 0))
	if (likely(sem->count > 0))
		sem->count--;
	else
		result = __down_killable(sem);
	spin_unlock_irqrestore(&sem->lock, flags);

@@ -111,7 +107,9 @@ int down_timeout(struct semaphore *sem, long jiffies)
	int result = 0;

	spin_lock_irqsave(&sem->lock, flags);
	if (unlikely(sem->count-- <= 0))
	if (likely(sem->count > 0))
		sem->count--;
	else
		result = __down_timeout(sem, jiffies);
	spin_unlock_irqrestore(&sem->lock, flags);

@@ -124,7 +122,7 @@ void up(struct semaphore *sem)
	unsigned long flags;

	spin_lock_irqsave(&sem->lock, flags);
	if (likely(sem->count >= 0))
	if (likely(list_empty(&sem->wait_list)))
		sem->count++;
	else
		__up(sem);
@@ -140,22 +138,6 @@ struct semaphore_waiter {
	int up;
};

/*
 * Wake up a process waiting on a semaphore.  We need to call this from both
 * __up and __down_common as it's possible to race a task into the semaphore
 * if it comes in at just the right time between two tasks calling up() and
 * a third task waking up.  This function assumes the wait_list is already
 * checked for being non-empty.
 */
static noinline void __sched __up_down_common(struct semaphore *sem)
{
	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
						struct semaphore_waiter, list);
	list_del(&waiter->list);
	waiter->up = 1;
	wake_up_process(waiter->task);
}

/*
 * Because this function is inlined, the 'state' parameter will be
 * constant, and thus optimised away by the compiler.  Likewise the
@@ -164,7 +146,6 @@ static noinline void __sched __up_down_common(struct semaphore *sem)
static inline int __sched __down_common(struct semaphore *sem, long state,
								long timeout)
{
	int result = 0;
	struct task_struct *task = current;
	struct semaphore_waiter waiter;

@@ -184,28 +165,16 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
		timeout = schedule_timeout(timeout);
		spin_lock_irq(&sem->lock);
		if (waiter.up)
			goto woken;
			return 0;
	}

 timed_out:
	list_del(&waiter.list);
	result = -ETIME;
	goto woken;
	return -ETIME;

 interrupted:
	list_del(&waiter.list);
	result = -EINTR;
 woken:
	/*
	 * Account for the process which woke us up.  For the case where
	 * we're interrupted, we need to increment the count on our own
	 * behalf.  I don't believe we can hit the case where the
	 * sem->count hits zero, *and* there's a second task sleeping,
	 * but it doesn't hurt, that's not a commonly exercised path and
	 * it's not a performance path either.
	 */
	if (unlikely((++sem->count >= 0) && !list_empty(&sem->wait_list)))
		__up_down_common(sem);
	return result;
	return -EINTR;
}

static noinline void __sched __down(struct semaphore *sem)
@@ -230,8 +199,9 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)

static noinline void __sched __up(struct semaphore *sem)
{
	if (unlikely(list_empty(&sem->wait_list)))
		sem->count++;
	else
		__up_down_common(sem);
	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
						struct semaphore_waiter, list);
	list_del(&waiter->list);
	waiter->up = 1;
	wake_up_process(waiter->task);
}