Commit 4291dc1a authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-sched-do-not-drop-root-lock-in-tcf_qevent_handle'



Petr Machata says:

====================
net: sched: Do not drop root lock in tcf_qevent_handle()

Mirred currently does not mix well with blocks executed after the qdisc
root lock is taken. This includes classification blocks (such as in PRIO,
ETS, DRR qdiscs) and qevents. The locking caused by the packet mirrored by
mirred can cause deadlocks: either when the thread of execution attempts to
take the lock a second time, or when two threads end up waiting on each
other's locks.

The qevent patchset attempted to not introduce further badness of this
sort, and dropped the lock before executing the qevent block. However this
lead to too little locking and races between qdisc configuration and packet
enqueue in the RED qdisc.

Before the deadlock issues are solved in a way that can be applied across
many qdiscs reasonably easily, do for qevents what is done for the
classification blocks and just keep holding the root lock.

That is done in patch #1. Patch #2 then drops the now unnecessary root_lock
argument from Qdisc_ops.enqueue.
====================

Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 89e35f66 ac5c66f2
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -568,7 +568,7 @@ void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch);
int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
			       struct netlink_ext_ack *extack);
struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
				  spinlock_t *root_lock, struct sk_buff **to_free, int *ret);
				  struct sk_buff **to_free, int *ret);
int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe);
#else
static inline int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
@@ -591,7 +591,7 @@ static inline int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlatt

static inline struct sk_buff *
tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
		  spinlock_t *root_lock, struct sk_buff **to_free, int *ret)
		  struct sk_buff **to_free, int *ret)
{
	return skb;
}
+2 −4
Original line number Diff line number Diff line
@@ -57,7 +57,6 @@ struct qdisc_skb_head {
struct Qdisc {
	int 			(*enqueue)(struct sk_buff *skb,
					   struct Qdisc *sch,
					   spinlock_t *root_lock,
					   struct sk_buff **to_free);
	struct sk_buff *	(*dequeue)(struct Qdisc *sch);
	unsigned int		flags;
@@ -242,7 +241,6 @@ struct Qdisc_ops {

	int 			(*enqueue)(struct sk_buff *skb,
					   struct Qdisc *sch,
					   spinlock_t *root_lock,
					   struct sk_buff **to_free);
	struct sk_buff *	(*dequeue)(struct Qdisc *);
	struct sk_buff *	(*peek)(struct Qdisc *);
@@ -790,11 +788,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
#endif
}

static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
				struct sk_buff **to_free)
{
	qdisc_calculate_pkt_len(skb, sch);
	return sch->enqueue(skb, sch, root_lock, to_free);
	return sch->enqueue(skb, sch, to_free);
}

static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
+2 −2
Original line number Diff line number Diff line
@@ -3749,7 +3749,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
	qdisc_calculate_pkt_len(skb, q);

	if (q->flags & TCQ_F_NOLOCK) {
		rc = q->enqueue(skb, q, NULL, &to_free) & NET_XMIT_MASK;
		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
		qdisc_run(q);

		if (unlikely(to_free))
@@ -3792,7 +3792,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
		qdisc_run_end(q);
		rc = NET_XMIT_SUCCESS;
	} else {
		rc = q->enqueue(skb, q, root_lock, &to_free) & NET_XMIT_MASK;
		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
		if (qdisc_run_begin(q)) {
			if (unlikely(contended)) {
				spin_unlock(&q->busylock);
+1 −7
Original line number Diff line number Diff line
@@ -3822,7 +3822,7 @@ int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index
EXPORT_SYMBOL(tcf_qevent_validate_change);

struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
				  spinlock_t *root_lock, struct sk_buff **to_free, int *ret)
				  struct sk_buff **to_free, int *ret)
{
	struct tcf_result cl_res;
	struct tcf_proto *fl;
@@ -3832,9 +3832,6 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru

	fl = rcu_dereference_bh(qe->filter_chain);

	if (root_lock)
		spin_unlock(root_lock);

	switch (tcf_classify(skb, fl, &cl_res, false)) {
	case TC_ACT_SHOT:
		qdisc_qstats_drop(sch);
@@ -3853,9 +3850,6 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru
		return NULL;
	}

	if (root_lock)
		spin_lock(root_lock);

	return skb;
}
EXPORT_SYMBOL(tcf_qevent_handle);
+2 −2
Original line number Diff line number Diff line
@@ -374,7 +374,7 @@ static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,

/* --------------------------- Qdisc operations ---------------------------- */

static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
			  struct sk_buff **to_free)
{
	struct atm_qdisc_data *p = qdisc_priv(sch);
@@ -432,7 +432,7 @@ done:
#endif
	}

	ret = qdisc_enqueue(skb, flow->q, root_lock, to_free);
	ret = qdisc_enqueue(skb, flow->q, to_free);
	if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
		if (net_xmit_drop_count(ret)) {
Loading