Commit 95278dda authored by Cong Wang's avatar Cong Wang Committed by David S. Miller
Browse files

net_sched: convert idrinfo->lock from spinlock to a mutex



In commit ec3ed293 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
we move fl_hw_destroy_tmplt() to a workqueue to avoid blocking
with the spinlock held. Unfortunately, this causes a lot of
troubles here:

1. tcf_chain_destroy() could be called right after we queue the work
   but before the work runs. This is a use-after-free.

2. The chain refcnt is already 0, we can't even just hold it again.
   We can check refcnt==1 but it is ugly.

3. The chain with refcnt 0 is still visible in its block, which means
   it could be still found and used!

4. The block has a refcnt too, we can't hold it without introducing a
   proper API either.

We can make it working but the end result is ugly. Instead of wasting
time on reviewing it, let's just convert the troubling spinlock to
a mutex, which allows us to use non-atomic allocations too.

Fixes: ec3ed293 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
Reported-by: default avatarIdo Schimmel <idosch@idosch.org>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Vlad Buslov <vladbu@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: default avatarCong Wang <xiyou.wangcong@gmail.com>
Tested-by: default avatarIdo Schimmel <idosch@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6f52f80e
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@
#include <net/netns/generic.h>

struct tcf_idrinfo {
	spinlock_t	lock;
	struct mutex	lock;
	struct idr	action_idr;
};

@@ -117,7 +117,7 @@ int tc_action_net_init(struct tc_action_net *tn,
	if (!tn->idrinfo)
		return -ENOMEM;
	tn->ops = ops;
	spin_lock_init(&tn->idrinfo->lock);
	mutex_init(&tn->idrinfo->lock);
	idr_init(&tn->idrinfo->action_idr);
	return err;
}
+22 −22
Original line number Diff line number Diff line
@@ -104,11 +104,11 @@ static int __tcf_action_put(struct tc_action *p, bool bind)
{
	struct tcf_idrinfo *idrinfo = p->idrinfo;

	if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
	if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
		if (bind)
			atomic_dec(&p->tcfa_bindcnt);
		idr_remove(&idrinfo->action_idr, p->tcfa_index);
		spin_unlock(&idrinfo->lock);
		mutex_unlock(&idrinfo->lock);

		tcf_action_cleanup(p);
		return 1;
@@ -200,7 +200,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
	struct tc_action *p;
	unsigned long id = 1;

	spin_lock(&idrinfo->lock);
	mutex_lock(&idrinfo->lock);

	s_i = cb->args[0];

@@ -235,7 +235,7 @@ done:
	if (index >= 0)
		cb->args[0] = index + 1;

	spin_unlock(&idrinfo->lock);
	mutex_unlock(&idrinfo->lock);
	if (n_i) {
		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
			cb->args[1] = n_i;
@@ -277,18 +277,18 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
	if (nla_put_string(skb, TCA_KIND, ops->kind))
		goto nla_put_failure;

	spin_lock(&idrinfo->lock);
	mutex_lock(&idrinfo->lock);
	idr_for_each_entry_ul(idr, p, id) {
		ret = tcf_idr_release_unsafe(p);
		if (ret == ACT_P_DELETED) {
			module_put(ops->owner);
			n_i++;
		} else if (ret < 0) {
			spin_unlock(&idrinfo->lock);
			mutex_unlock(&idrinfo->lock);
			goto nla_put_failure;
		}
	}
	spin_unlock(&idrinfo->lock);
	mutex_unlock(&idrinfo->lock);

	if (nla_put_u32(skb, TCA_FCNT, n_i))
		goto nla_put_failure;
@@ -324,13 +324,13 @@ int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
	struct tcf_idrinfo *idrinfo = tn->idrinfo;
	struct tc_action *p;

	spin_lock(&idrinfo->lock);
	mutex_lock(&idrinfo->lock);
	p = idr_find(&idrinfo->action_idr, index);
	if (IS_ERR(p))
		p = NULL;
	else if (p)
		refcount_inc(&p->tcfa_refcnt);
	spin_unlock(&idrinfo->lock);
	mutex_unlock(&idrinfo->lock);

	if (p) {
		*a = p;
@@ -345,10 +345,10 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
	struct tc_action *p;
	int ret = 0;

	spin_lock(&idrinfo->lock);
	mutex_lock(&idrinfo->lock);
	p = idr_find(&idrinfo->action_idr, index);
	if (!p) {
		spin_unlock(&idrinfo->lock);
		mutex_unlock(&idrinfo->lock);
		return -ENOENT;
	}

@@ -358,7 +358,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)

			WARN_ON(p != idr_remove(&idrinfo->action_idr,
						p->tcfa_index));
			spin_unlock(&idrinfo->lock);
			mutex_unlock(&idrinfo->lock);

			tcf_action_cleanup(p);
			module_put(owner);
@@ -369,7 +369,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
		ret = -EPERM;
	}

	spin_unlock(&idrinfo->lock);
	mutex_unlock(&idrinfo->lock);
	return ret;
}

@@ -431,10 +431,10 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
{
	struct tcf_idrinfo *idrinfo = tn->idrinfo;

	spin_lock(&idrinfo->lock);
	mutex_lock(&idrinfo->lock);
	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
	spin_unlock(&idrinfo->lock);
	mutex_unlock(&idrinfo->lock);
}
EXPORT_SYMBOL(tcf_idr_insert);

@@ -444,10 +444,10 @@ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
{
	struct tcf_idrinfo *idrinfo = tn->idrinfo;

	spin_lock(&idrinfo->lock);
	mutex_lock(&idrinfo->lock);
	/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
	WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
	spin_unlock(&idrinfo->lock);
	mutex_unlock(&idrinfo->lock);
}
EXPORT_SYMBOL(tcf_idr_cleanup);

@@ -465,14 +465,14 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
	int ret;

again:
	spin_lock(&idrinfo->lock);
	mutex_lock(&idrinfo->lock);
	if (*index) {
		p = idr_find(&idrinfo->action_idr, *index);
		if (IS_ERR(p)) {
			/* This means that another process allocated
			 * index but did not assign the pointer yet.
			 */
			spin_unlock(&idrinfo->lock);
			mutex_unlock(&idrinfo->lock);
			goto again;
		}

@@ -485,7 +485,7 @@ again:
		} else {
			*a = NULL;
			ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
					    *index, GFP_ATOMIC);
					    *index, GFP_KERNEL);
			if (!ret)
				idr_replace(&idrinfo->action_idr,
					    ERR_PTR(-EBUSY), *index);
@@ -494,12 +494,12 @@ again:
		*index = 1;
		*a = NULL;
		ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
				    UINT_MAX, GFP_ATOMIC);
				    UINT_MAX, GFP_KERNEL);
		if (!ret)
			idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
				    *index);
	}
	spin_unlock(&idrinfo->lock);
	mutex_unlock(&idrinfo->lock);
	return ret;
}
EXPORT_SYMBOL(tcf_idr_check_alloc);
+2 −11
Original line number Diff line number Diff line
@@ -79,7 +79,6 @@ struct fl_flow_tmplt {
	struct fl_flow_key mask;
	struct flow_dissector dissector;
	struct tcf_chain *chain;
	struct rcu_work rwork;
};

struct cls_fl_head {
@@ -1438,20 +1437,12 @@ errout_tb:
	return ERR_PTR(err);
}

static void fl_tmplt_destroy_work(struct work_struct *work)
{
	struct fl_flow_tmplt *tmplt = container_of(to_rcu_work(work),
						 struct fl_flow_tmplt, rwork);

	fl_hw_destroy_tmplt(tmplt->chain, tmplt);
	kfree(tmplt);
}

static void fl_tmplt_destroy(void *tmplt_priv)
{
	struct fl_flow_tmplt *tmplt = tmplt_priv;

	tcf_queue_work(&tmplt->rwork, fl_tmplt_destroy_work);
	fl_hw_destroy_tmplt(tmplt->chain, tmplt);
	kfree(tmplt);
}

static int fl_dump_key_val(struct sk_buff *skb,