fs/epoll: make nesting accounting safe for -rt kernel (efcdd350) · Commits · 戴 / test

fs/eventpoll.c

+43 −21

Original line number	Diff line number	Diff line
		@@ -218,13 +218,18 @@ struct eventpoll {
		struct file *file;

		/* used to optimize loop detection check */
		int visited;
		struct list_head visited_list_link;
		int visited;

		#ifdef CONFIG_NET_RX_BUSY_POLL
		/* used to track busy poll napi_id */
		unsigned int napi_id;
		#endif

		#ifdef CONFIG_DEBUG_LOCK_ALLOC
		/* tracks wakeup nests for lockdep validation */
		u8 nests;
		#endif
		};

		/* Wait structure used by the poll hooks */
		@@ -545,30 +550,47 @@ out_unlock:
		*/
		#ifdef CONFIG_DEBUG_LOCK_ALLOC

		static DEFINE_PER_CPU(int, wakeup_nest);

		static void ep_poll_safewake(wait_queue_head_t *wq)
		static void ep_poll_safewake(struct eventpoll ep, struct epitem epi)
		{
		struct eventpoll *ep_src;
		unsigned long flags;
		int subclass;

		local_irq_save(flags);
		preempt_disable();
		subclass = __this_cpu_read(wakeup_nest);
		spin_lock_nested(&wq->lock, subclass + 1);
		__this_cpu_inc(wakeup_nest);
		wake_up_locked_poll(wq, POLLIN);
		__this_cpu_dec(wakeup_nest);
		spin_unlock(&wq->lock);
		local_irq_restore(flags);
		preempt_enable();
		u8 nests = 0;

		/*
		* To set the subclass or nesting level for spin_lock_irqsave_nested()
		* it might be natural to create a per-cpu nest count. However, since
		* we can recurse on ep->poll_wait.lock, and a non-raw spinlock can
		* schedule() in the -rt kernel, the per-cpu variable are no longer
		* protected. Thus, we are introducing a per eventpoll nest field.
		* If we are not being call from ep_poll_callback(), epi is NULL and
		* we are at the first level of nesting, 0. Otherwise, we are being
		* called from ep_poll_callback() and if a previous wakeup source is
		* not an epoll file itself, we are at depth 1 since the wakeup source
		* is depth 0. If the wakeup source is a previous epoll file in the
		* wakeup chain then we use its nests value and record ours as
		* nests + 1. The previous epoll file nests value is stable since its
		* already holding its own poll_wait.lock.
		*/
		if (epi) {
		if ((is_file_epoll(epi->ffd.file))) {
		ep_src = epi->ffd.file->private_data;
		nests = ep_src->nests;
		} else {
		nests = 1;
		}
		}
		spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
		ep->nests = nests + 1;
		wake_up_locked_poll(&ep->poll_wait, EPOLLIN);
		ep->nests = 0;
		spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
		}

		#else

		static void ep_poll_safewake(wait_queue_head_t *wq)
		static void ep_poll_safewake(struct eventpoll ep, struct epitem epi)
		{
		wake_up_poll(wq, EPOLLIN);
		wake_up_poll(&ep->poll_wait, EPOLLIN);
		}

		#endif
		@@ -789,7 +811,7 @@ static void ep_free(struct eventpoll *ep)

		/* We need to release all tasks waiting for these file */
		if (waitqueue_active(&ep->poll_wait))
		ep_poll_safewake(&ep->poll_wait);
		ep_poll_safewake(ep, NULL);

		/*
		* We need to lock this because we could be hit by
		@@ -1258,7 +1280,7 @@ out_unlock:

		/* We have to call this outside the lock */
		if (pwake)
		ep_poll_safewake(&ep->poll_wait);
		ep_poll_safewake(ep, epi);

		if (!(epi->event.events & EPOLLEXCLUSIVE))
		ewake = 1;
		@@ -1562,7 +1584,7 @@ static int ep_insert(struct eventpoll ep, const struct epoll_event event,

		/* We have to call this outside the lock */
		if (pwake)
		ep_poll_safewake(&ep->poll_wait);
		ep_poll_safewake(ep, NULL);

		return 0;

		@@ -1666,7 +1688,7 @@ static int ep_modify(struct eventpoll ep, struct epitem epi,

		/* We have to call this outside the lock */
		if (pwake)
		ep_poll_safewake(&ep->poll_wait);
		ep_poll_safewake(ep, NULL);

		return 0;
		}

Admin message