Commit 78f07ada authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'ipv6-Refactor-nexthop-selection-helpers-during-a-fib-lookup'



David Ahern says:

====================
ipv6: Refactor nexthop selection helpers during a fib lookup

IPv6 has a fib6_nh embedded within each fib6_info and a separate
fib6_info for each path in a multipath route. A side effect is that
a fib6_info is passed all the way down the stack when selecting a path
on a fib lookup. Refactor the fib lookup functions and associated
helper functions to take a fib6_nh when appropriate to enable IPv6
to work with nexthop objects where the fib6_nh is not directly part
of a fib entry.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 62720b12 0b34eb00
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -127,6 +127,10 @@ struct rt6_exception {

struct fib6_nh {
	struct fib_nh_common	nh_common;

#ifdef CONFIG_IPV6_ROUTER_PREF
	unsigned long		last_probe;
#endif
};

struct fib6_info {
@@ -155,10 +159,6 @@ struct fib6_info {
	struct rt6_info * __percpu	*rt6i_pcpu;
	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;

#ifdef CONFIG_IPV6_ROUTER_PREF
	unsigned long			last_probe;
#endif

	u32				fib6_metric;
	u8				fib6_protocol;
	u8				fib6_type;
+145 −121
Original line number Diff line number Diff line
@@ -102,7 +102,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
					   struct sk_buff *skb, u32 mtu);
static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
					struct sk_buff *skb);
static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
			   int strict);
static size_t rt6_nlmsg_size(struct fib6_info *rt);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
			 struct fib6_info *rt, struct dst_entry *dst,
@@ -446,12 +447,13 @@ struct fib6_info *fib6_multipath_select(const struct net *net,

	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
				 fib6_siblings) {
		const struct fib6_nh *nh = &sibling->fib6_nh;
		int nh_upper_bound;

		nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound);
		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
		if (fl6->mp_hash > nh_upper_bound)
			continue;
		if (rt6_score_route(sibling, oif, strict) < 0)
		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
			break;
		match = sibling;
		break;
@@ -464,12 +466,34 @@ struct fib6_info *fib6_multipath_select(const struct net *net,
 *	Route lookup. rcu_read_lock() should be held.
 */

static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
			       const struct in6_addr *saddr, int oif, int flags)
{
	const struct net_device *dev;

	if (nh->fib_nh_flags & RTNH_F_DEAD)
		return false;

	dev = nh->fib_nh_dev;
	if (oif) {
		if (dev->ifindex == oif)
			return true;
	} else {
		if (ipv6_chk_addr(net, saddr, dev,
				  flags & RT6_LOOKUP_F_IFACE))
			return true;
	}

	return false;
}

static inline struct fib6_info *rt6_device_match(struct net *net,
						 struct fib6_info *rt,
						    const struct in6_addr *saddr,
						    int oif,
						    int flags)
{
	const struct fib6_nh *nh;
	struct fib6_info *sprt;

	if (!oif && ipv6_addr_any(saddr) &&
@@ -477,20 +501,10 @@ static inline struct fib6_info *rt6_device_match(struct net *net,
		return rt;

	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
		const struct net_device *dev = sprt->fib6_nh.fib_nh_dev;

		if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
			continue;

		if (oif) {
			if (dev->ifindex == oif)
				return sprt;
		} else {
			if (ipv6_chk_addr(net, saddr, dev,
					  flags & RT6_LOOKUP_F_IFACE))
		nh = &sprt->fib6_nh;
		if (__rt6_device_match(net, nh, saddr, oif, flags))
			return sprt;
	}
	}

	if (oif && flags & RT6_LOOKUP_F_IFACE)
		return net->ipv6.fib6_null_entry;
@@ -517,7 +531,7 @@ static void rt6_probe_deferred(struct work_struct *w)
	kfree(work);
}

static void rt6_probe(struct fib6_info *rt)
static void rt6_probe(struct fib6_nh *fib6_nh)
{
	struct __rt6_probe_work *work = NULL;
	const struct in6_addr *nh_gw;
@@ -533,11 +547,11 @@ static void rt6_probe(struct fib6_info *rt)
	 * Router Reachability Probe MUST be rate-limited
	 * to no more than one per minute.
	 */
	if (!rt || !rt->fib6_nh.fib_nh_gw_family)
	if (fib6_nh->fib_nh_gw_family)
		return;

	nh_gw = &rt->fib6_nh.fib_nh_gw6;
	dev = rt->fib6_nh.fib_nh_dev;
	nh_gw = &fib6_nh->fib_nh_gw6;
	dev = fib6_nh->fib_nh_dev;
	rcu_read_lock_bh();
	idev = __in6_dev_get(dev);
	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
@@ -554,13 +568,13 @@ static void rt6_probe(struct fib6_info *rt)
				__neigh_set_probe_once(neigh);
		}
		write_unlock(&neigh->lock);
	} else if (time_after(jiffies, rt->last_probe +
	} else if (time_after(jiffies, fib6_nh->last_probe +
				       idev->cnf.rtr_probe_interval)) {
		work = kmalloc(sizeof(*work), GFP_ATOMIC);
	}

	if (work) {
		rt->last_probe = jiffies;
		fib6_nh->last_probe = jiffies;
		INIT_WORK(&work->work, rt6_probe_deferred);
		work->target = *nh_gw;
		dev_hold(dev);
@@ -572,7 +586,7 @@ out:
	rcu_read_unlock_bh();
}
#else
static inline void rt6_probe(struct fib6_info *rt)
static inline void rt6_probe(struct fib6_nh *fib6_nh)
{
}
#endif
@@ -580,27 +594,14 @@ static inline void rt6_probe(struct fib6_info *rt)
/*
 * Default Router Selection (RFC 2461 6.3.6)
 */
static inline int rt6_check_dev(struct fib6_info *rt, int oif)
{
	const struct net_device *dev = rt->fib6_nh.fib_nh_dev;

	if (!oif || dev->ifindex == oif)
		return 2;
	return 0;
}

static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
{
	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
	struct neighbour *neigh;

	if (rt->fib6_flags & RTF_NONEXTHOP ||
	    !rt->fib6_nh.fib_nh_gw_family)
		return RT6_NUD_SUCCEED;

	rcu_read_lock_bh();
	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev,
					  &rt->fib6_nh.fib_nh_gw6);
	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
					  &fib6_nh->fib_nh_gw6);
	if (neigh) {
		read_lock(&neigh->lock);
		if (neigh->nud_state & NUD_VALID)
@@ -621,43 +622,44 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
	return ret;
}

static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
			   int strict)
{
	int m;
	int m = 0;

	if (!oif || nh->fib_nh_dev->ifindex == oif)
		m = 2;

	m = rt6_check_dev(rt, oif);
	if (!m && (strict & RT6_LOOKUP_F_IFACE))
		return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
#endif
	if (strict & RT6_LOOKUP_F_REACHABLE) {
		int n = rt6_check_neigh(rt);
	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
		int n = rt6_check_neigh(nh);
		if (n < 0)
			return n;
	}
	return m;
}

static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
				   int *mpri, struct fib6_info *match,
				   bool *do_rr)
static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
		       int oif, int strict, int *mpri, bool *do_rr)
{
	int m;
	bool match_do_rr = false;
	bool rc = false;
	int m;

	if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
	if (nh->fib_nh_flags & RTNH_F_DEAD)
		goto out;

	if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) &&
	    rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
		goto out;

	if (fib6_check_expired(rt))
		goto out;

	m = rt6_score_route(rt, oif, strict);
	m = rt6_score_route(nh, fib6_flags, oif, strict);
	if (m == RT6_NUD_FAIL_DO_RR) {
		match_do_rr = true;
		m = 0; /* lowest valid score */
@@ -666,16 +668,42 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
	}

	if (strict & RT6_LOOKUP_F_REACHABLE)
		rt6_probe(rt);
		rt6_probe(nh);

	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
	if (m > *mpri) {
		*do_rr = match_do_rr;
		*mpri = m;
		match = rt;
		rc = true;
	}
out:
	return match;
	return rc;
}

static void __find_rr_leaf(struct fib6_info *rt_start,
			   struct fib6_info *nomatch, u32 metric,
			   struct fib6_info **match, struct fib6_info **cont,
			   int oif, int strict, bool *do_rr, int *mpri)
{
	struct fib6_info *rt;

	for (rt = rt_start;
	     rt && rt != nomatch;
	     rt = rcu_dereference(rt->fib6_next)) {
		struct fib6_nh *nh;

		if (cont && rt->fib6_metric != metric) {
			*cont = rt;
			return;
		}

		if (fib6_check_expired(rt))
			continue;

		nh = &rt->fib6_nh;
		if (find_match(nh, rt->fib6_flags, oif, strict, mpri, do_rr))
			*match = rt;
	}
}

static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
@@ -684,35 +712,20 @@ static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
				      u32 metric, int oif, int strict,
				      bool *do_rr)
{
	struct fib6_info *rt, *match, *cont;
	struct fib6_info *match = NULL, *cont = NULL;
	int mpri = -1;

	match = NULL;
	cont = NULL;
	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
		if (rt->fib6_metric != metric) {
			cont = rt;
			break;
		}

		match = find_match(rt, oif, strict, &mpri, match, do_rr);
	}

	for (rt = leaf; rt && rt != rr_head;
	     rt = rcu_dereference(rt->fib6_next)) {
		if (rt->fib6_metric != metric) {
			cont = rt;
			break;
		}
	__find_rr_leaf(rr_head, NULL, metric, &match, &cont,
		       oif, strict, do_rr, &mpri);

		match = find_match(rt, oif, strict, &mpri, match, do_rr);
	}
	__find_rr_leaf(leaf, rr_head, metric, &match, &cont,
		       oif, strict, do_rr, &mpri);

	if (match || !cont)
		return match;

	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
		match = find_match(rt, oif, strict, &mpri, match, do_rr);
	__find_rr_leaf(cont, NULL, metric, &match, NULL,
		       oif, strict, do_rr, &mpri);

	return match;
}
@@ -1061,36 +1074,37 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
	f6i = rcu_dereference(fn->leaf);
	if (!f6i) {
	if (!f6i)
		f6i = net->ipv6.fib6_null_entry;
	} else {
	else
		f6i = rt6_device_match(net, f6i, &fl6->saddr,
				      fl6->flowi6_oif, flags);
		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
			f6i = fib6_multipath_select(net, f6i, fl6,
						    fl6->flowi6_oif, skb,
						    flags);
	}

	if (f6i == net->ipv6.fib6_null_entry) {
		fn = fib6_backtrack(fn, &fl6->saddr);
		if (fn)
			goto restart;
	}

	trace_fib6_table_lookup(net, f6i, table, fl6);
		rt = net->ipv6.ip6_null_entry;
		dst_hold(&rt->dst);
		goto out;
	}

	if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
		f6i = fib6_multipath_select(net, f6i, fl6, fl6->flowi6_oif, skb,
					    flags);
	/* Search through exception table */
	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
	if (rt) {
		if (ip6_hold_safe(net, &rt))
			dst_use_noref(&rt->dst, jiffies);
	} else if (f6i == net->ipv6.fib6_null_entry) {
		rt = net->ipv6.ip6_null_entry;
		dst_hold(&rt->dst);
	} else {
		rt = ip6_create_rt_rcu(f6i);
	}

out:
	trace_fib6_table_lookup(net, f6i, table, fl6);

	rcu_read_unlock();

	return rt;
@@ -1841,9 +1855,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
	rcu_read_lock();

	f6i = fib6_table_lookup(net, table, oif, fl6, strict);
	if (f6i->fib6_nsiblings)
		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);

	if (f6i == net->ipv6.fib6_null_entry) {
		rt = net->ipv6.ip6_null_entry;
		rcu_read_unlock();
@@ -1851,6 +1862,9 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
		return rt;
	}

	if (f6i->fib6_nsiblings)
		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);

	/*Search through exception table */
	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
	if (rt) {
@@ -2393,6 +2407,35 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
		      NULL);
}

static bool ip6_redirect_nh_match(struct fib6_info *f6i,
				  struct fib6_nh *nh,
				  struct flowi6 *fl6,
				  const struct in6_addr *gw,
				  struct rt6_info **ret)
{
	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
		return false;

	/* rt_cache's gateway might be different from its 'parent'
	 * in the case of an ip redirect.
	 * So we keep searching in the exception table if the gateway
	 * is different.
	 */
	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
		struct rt6_info *rt_cache;

		rt_cache = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
		if (rt_cache &&
		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
			*ret = rt_cache;
			return true;
		}
		return false;
	}
	return true;
}

/* Handle redirects */
struct ip6rd_flowi {
	struct flowi6 fl6;
@@ -2406,7 +2449,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
					     int flags)
{
	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
	struct rt6_info *ret = NULL, *rt_cache;
	struct rt6_info *ret = NULL;
	struct fib6_info *rt;
	struct fib6_node *fn;

@@ -2424,34 +2467,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
	for_each_fib6_node_rt_rcu(fn) {
		if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
			continue;
		if (fib6_check_expired(rt))
			continue;
		if (rt->fib6_flags & RTF_REJECT)
			break;
		if (!rt->fib6_nh.fib_nh_gw_family)
			continue;
		if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
			continue;
		/* rt_cache's gateway might be different from its 'parent'
		 * in the case of an ip redirect.
		 * So we keep searching in the exception table if the gateway
		 * is different.
		 */
		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) {
			rt_cache = rt6_find_cached_rt(rt,
						      &fl6->daddr,
						      &fl6->saddr);
			if (rt_cache &&
			    ipv6_addr_equal(&rdfl->gateway,
					    &rt_cache->rt6i_gateway)) {
				ret = rt_cache;
				break;
			}
			continue;
		}
		break;
		if (ip6_redirect_nh_match(rt, &rt->fib6_nh, fl6,
					  &rdfl->gateway, &ret))
			goto out;
	}

	if (!rt)