Commit 4001f1f0 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'Support-for-fdb-ECMP-nexthop-groups'

Roopa Prabhu says:

====================
Support for fdb ECMP nexthop groups

This series introduces ecmp nexthops and nexthop groups
for mac fdb entries. In subsequent patches this is used
by the vxlan driver fdb entries. The use case is
E-VPN multihoming [1,2,3] which requires bridged vxlan traffic
to be load balanced to remote switches (vteps) belonging to
the same multi-homed ethernet segment (This is analogous to
a multi-homed LAG but over vxlan).

Changes include new nexthop flag NHA_FDB for nexthops
referenced by fdb entries. These nexthops only have ip.
The patches make sure that routes dont reference such nexthops.

example:
$ip nexthop add id 12 via 172.16.1.2 fdb
$ip nexthop add id 13 via 172.16.1.3 fdb
$ip nexthop add id 102 group 12/13 fdb

$bridge fdb add 02:02:00:00:00:13 dev vxlan1000 nhid 101 self

[1] E-VPN https://tools.ietf.org/html/rfc7432
[2] E-VPN VxLAN: https://tools.ietf.org/html/rfc8365
[3] LPC talk with mention of nexthop groups for L2 ecmp
http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf



v4 -
    - fix error path free_skb in vxlan_xmit_nh
    - fix atomic notifier initialization issue
      (Reported-by: default avatarkernel test robot <rong.a.chen@intel.com&gt;)>
      The reported error was easy to locate and fix, but i was not
      able to re-test with the robot reproducer script due to some
      other issues with running the script on my test system.

v3 - fix wording in selftest print as pointed out by davidA

v2 -
	- dropped nikolays fixes for nexthop multipath null pointer deref
	  (he will send those separately)
	- added negative tests for route add with fdb nexthop + a few more
	- Fixes for a few  fdb replace conditions found during more testing
	- Moved to rcu_dereference_rtnl in vxlan_fdb_info and consolidate rcu
	  dereferences
	- Fixes to build failures Reported-by: default avatarkbuild test robot <lkp@intel.com>
	- DavidA, I am going to send a separate patch for the neighbor code validation
	  for NDA_NH_ID if thats ok.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7b1b843a 0534c548
Loading
Loading
Loading
Loading
+276 −63
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <net/netns/generic.h>
#include <net/tun_proto.h>
#include <net/vxlan.h>
#include <net/nexthop.h>

#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_tunnel.h>
@@ -78,6 +79,9 @@ struct vxlan_fdb {
	u16		  state;	/* see ndm_state */
	__be32		  vni;
	u16		  flags;	/* see ndm_flags and below */
	struct list_head  nh_list;
	struct nexthop __rcu *nh;
	struct vxlan_dev  *vdev;
};

#define NTF_VXLAN_ADDED_BY_USER 0x100
@@ -174,11 +178,15 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
 */
static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
{
	if (rcu_access_pointer(fdb->nh))
		return NULL;
	return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
}

static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
{
	if (rcu_access_pointer(fdb->nh))
		return NULL;
	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
}

@@ -251,9 +259,10 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
{
	unsigned long now = jiffies;
	struct nda_cacheinfo ci;
	bool send_ip, send_eth;
	struct nlmsghdr *nlh;
	struct nexthop *nh;
	struct ndmsg *ndm;
	bool send_ip, send_eth;

	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
	if (nlh == NULL)
@@ -264,16 +273,21 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,

	send_eth = send_ip = true;

	nh = rcu_dereference_rtnl(fdb->nh);
	if (type == RTM_GETNEIGH) {
		if (rdst) {
			send_ip = !vxlan_addr_any(&rdst->remote_ip);
		send_eth = !is_zero_ether_addr(fdb->eth_addr);
			ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
		} else if (nh) {
			ndm->ndm_family = nexthop_get_family(nh);
		}
		send_eth = !is_zero_ether_addr(fdb->eth_addr);
	} else
		ndm->ndm_family	= AF_BRIDGE;
	ndm->ndm_state = fdb->state;
	ndm->ndm_ifindex = vxlan->dev->ifindex;
	ndm->ndm_flags = fdb->flags;
	if (rdst->offloaded)
	if (rdst && rdst->offloaded)
		ndm->ndm_flags |= NTF_OFFLOADED;
	ndm->ndm_type = RTN_UNICAST;

@@ -284,23 +298,30 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,

	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
		goto nla_put_failure;

	if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
	if (nh) {
		if (nla_put_u32(skb, NDA_NH_ID, nh->id))
			goto nla_put_failure;
	} else if (rdst) {
		if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
						  &rdst->remote_ip))
			goto nla_put_failure;

	if (rdst->remote_port && rdst->remote_port != vxlan->cfg.dst_port &&
		if (rdst->remote_port &&
		    rdst->remote_port != vxlan->cfg.dst_port &&
		    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
			goto nla_put_failure;
		if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
		    nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
			goto nla_put_failure;
		if (rdst->remote_ifindex &&
		    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
			goto nla_put_failure;
	}

	if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
	    nla_put_u32(skb, NDA_SRC_VNI,
			be32_to_cpu(fdb->vni)))
		goto nla_put_failure;
	if (rdst->remote_ifindex &&
	    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
		goto nla_put_failure;

	ci.ndm_used	 = jiffies_to_clock_t(now - fdb->used);
	ci.ndm_confirmed = 0;
@@ -401,7 +422,7 @@ static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
{
	int err;

	if (swdev_notify) {
	if (swdev_notify && rd) {
		switch (type) {
		case RTM_NEWNEIGH:
			err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
@@ -793,8 +814,9 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}

static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state,
					 __be32 src_vni, __u16 ndm_flags)
static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
					 __u16 state, __be32 src_vni,
					 __u16 ndm_flags)
{
	struct vxlan_fdb *f;

@@ -805,6 +827,9 @@ static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state,
	f->flags = ndm_flags;
	f->updated = f->used = jiffies;
	f->vni = src_vni;
	f->nh = NULL;
	f->vdev = vxlan;
	INIT_LIST_HEAD(&f->nh_list);
	INIT_LIST_HEAD(&f->remotes);
	memcpy(f->eth_addr, mac, ETH_ALEN);

@@ -819,11 +844,78 @@ static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
			   vxlan_fdb_head(vxlan, mac, src_vni));
}

static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
			       u32 nhid, struct netlink_ext_ack *extack)
{
	struct nexthop *old_nh = rtnl_dereference(fdb->nh);
	struct nh_group *nhg;
	struct nexthop *nh;
	int err = -EINVAL;

	if (old_nh && old_nh->id == nhid)
		return 0;

	nh = nexthop_find_by_id(vxlan->net, nhid);
	if (!nh) {
		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
		goto err_inval;
	}

	if (nh) {
		if (!nexthop_get(nh)) {
			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
			nh = NULL;
			goto err_inval;
		}
		if (!nh->is_fdb_nh) {
			NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
			goto err_inval;
		}

		if (!nh->is_group || !nh->nh_grp->mpath) {
			NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
			goto err_inval;
		}

		/* check nexthop group family */
		nhg = rtnl_dereference(nh->nh_grp);
		switch (vxlan->default_dst.remote_ip.sa.sa_family) {
		case AF_INET:
			if (!nhg->has_v4) {
				err = -EAFNOSUPPORT;
				NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
				goto err_inval;
			}
			break;
		case AF_INET6:
			if (nhg->has_v4) {
				err = -EAFNOSUPPORT;
				NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
				goto err_inval;
			}
		}
	}

	if (old_nh) {
		list_del_rcu(&fdb->nh_list);
		nexthop_put(old_nh);
	}
	rcu_assign_pointer(fdb->nh, nh);
	list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
	return 1;

err_inval:
	if (nh)
		nexthop_put(nh);
	return err;
}

static int vxlan_fdb_create(struct vxlan_dev *vxlan,
			    const u8 *mac, union vxlan_addr *ip,
			    __u16 state, __be16 port, __be32 src_vni,
			    __be32 vni, __u32 ifindex, __u16 ndm_flags,
			    struct vxlan_fdb **fdb)
			    u32 nhid, struct vxlan_fdb **fdb,
			    struct netlink_ext_ack *extack)
{
	struct vxlan_rdst *rd = NULL;
	struct vxlan_fdb *f;
@@ -834,24 +926,37 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
		return -ENOSPC;

	netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
	f = vxlan_fdb_alloc(mac, state, src_vni, ndm_flags);
	f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
	if (!f)
		return -ENOMEM;

	if (nhid)
		rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
	else
		rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
	if (rc < 0) {
		kfree(f);
		return rc;
	}
	if (rc < 0)
		goto errout;

	*fdb = f;

	return 0;

errout:
	kfree(f);
	return rc;
}

static void __vxlan_fdb_free(struct vxlan_fdb *f)
{
	struct vxlan_rdst *rd, *nd;
	struct nexthop *nh;

	nh = rcu_dereference_raw(f->nh);
	if (nh) {
		rcu_assign_pointer(f->nh, NULL);
		list_del_rcu(&f->nh_list);
		nexthop_put(nh);
	}

	list_for_each_entry_safe(rd, nd, &f->remotes, list) {
		dst_cache_destroy(&rd->dst_cache);
@@ -875,12 +980,18 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
	netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);

	--vxlan->addrcnt;
	if (do_notify)
	if (do_notify) {
		if (rcu_access_pointer(f->nh))
			vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
					 swdev_notify, NULL);
		else
			list_for_each_entry(rd, &f->remotes, list)
				vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
						 swdev_notify, NULL);
	}

	hlist_del_rcu(&f->hlist);
	f->vdev = NULL;
	call_rcu(&f->rcu, vxlan_fdb_free);
}

@@ -897,7 +1008,7 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
				     __u16 state, __u16 flags,
				     __be16 port, __be32 vni,
				     __u32 ifindex, __u16 ndm_flags,
				     struct vxlan_fdb *f,
				     struct vxlan_fdb *f, u32 nhid,
				     bool swdev_notify,
				     struct netlink_ext_ack *extack)
{
@@ -908,6 +1019,18 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
	int rc = 0;
	int err;

	if (nhid && !rcu_access_pointer(f->nh)) {
		NL_SET_ERR_MSG(extack,
			       "Cannot replace an existing non nexthop fdb with a nexthop");
		return -EOPNOTSUPP;
	}

	if (nhid && (flags & NLM_F_APPEND)) {
		NL_SET_ERR_MSG(extack,
			       "Cannot append to a nexthop fdb");
		return -EOPNOTSUPP;
	}

	/* Do not allow an externally learned entry to take over an entry added
	 * by the user.
	 */
@@ -929,10 +1052,17 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
		/* Only change unicasts */
		if (!(is_multicast_ether_addr(f->eth_addr) ||
		      is_zero_ether_addr(f->eth_addr))) {
			if (nhid) {
				rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
				if (rc < 0)
					return rc;
			} else {
				rc = vxlan_fdb_replace(f, ip, port, vni,
						       ifindex, &oldrd);
			}
			notify |= rc;
		} else {
			NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
			return -EOPNOTSUPP;
		}
	}
@@ -962,6 +1092,8 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
	return 0;

err_notify:
	if (nhid)
		return err;
	if ((flags & NLM_F_REPLACE) && rc)
		*rd = oldrd;
	else if ((flags & NLM_F_APPEND) && rc) {
@@ -975,7 +1107,7 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
				   const u8 *mac, union vxlan_addr *ip,
				   __u16 state, __u16 flags,
				   __be16 port, __be32 src_vni, __be32 vni,
				   __u32 ifindex, __u16 ndm_flags,
				   __u32 ifindex, __u16 ndm_flags, u32 nhid,
				   bool swdev_notify,
				   struct netlink_ext_ack *extack)
{
@@ -990,7 +1122,7 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,

	netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
	rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
			      vni, ifindex, fdb_flags, &f);
			      vni, ifindex, fdb_flags, nhid, &f, extack);
	if (rc < 0)
		return rc;

@@ -1012,7 +1144,7 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,
			    const u8 *mac, union vxlan_addr *ip,
			    __u16 state, __u16 flags,
			    __be16 port, __be32 src_vni, __be32 vni,
			    __u32 ifindex, __u16 ndm_flags,
			    __u32 ifindex, __u16 ndm_flags, u32 nhid,
			    bool swdev_notify,
			    struct netlink_ext_ack *extack)
{
@@ -1028,14 +1160,15 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,

		return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
						 vni, ifindex, ndm_flags, f,
						 swdev_notify, extack);
						 nhid, swdev_notify, extack);
	} else {
		if (!(flags & NLM_F_CREATE))
			return -ENOENT;

		return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
					       port, src_vni, vni, ifindex,
					       ndm_flags, swdev_notify, extack);
					       ndm_flags, nhid, swdev_notify,
					       extack);
	}
}

@@ -1049,7 +1182,7 @@ static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,

static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
			   union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
			   __be32 *vni, u32 *ifindex)
			   __be32 *vni, u32 *ifindex, u32 *nhid)
{
	struct net *net = dev_net(vxlan->dev);
	int err;
@@ -1109,6 +1242,11 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
		*ifindex = 0;
	}

	if (tb[NDA_NH_ID])
		*nhid = nla_get_u32(tb[NDA_NH_ID]);
	else
		*nhid = 0;

	return 0;
}

@@ -1123,7 +1261,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
	union vxlan_addr ip;
	__be16 port;
	__be32 src_vni, vni;
	u32 ifindex;
	u32 ifindex, nhid;
	u32 hash_index;
	int err;

@@ -1133,10 +1271,11 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
		return -EINVAL;
	}

	if (tb[NDA_DST] == NULL)
	if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
		return -EINVAL;

	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
			      &nhid);
	if (err)
		return err;

@@ -1148,7 +1287,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
	err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
			       port, src_vni, vni, ifindex,
			       ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
			       true, extack);
			       nhid, true, extack);
	spin_unlock_bh(&vxlan->hash_lock[hash_index]);

	return err;
@@ -1159,8 +1298,8 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
			      __be16 port, __be32 src_vni, __be32 vni,
			      u32 ifindex, bool swdev_notify)
{
	struct vxlan_fdb *f;
	struct vxlan_rdst *rd = NULL;
	struct vxlan_fdb *f;
	int err = -ENOENT;

	f = vxlan_find_mac(vxlan, addr, src_vni);
@@ -1195,12 +1334,13 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
	struct vxlan_dev *vxlan = netdev_priv(dev);
	union vxlan_addr ip;
	__be32 src_vni, vni;
	__be16 port;
	u32 ifindex;
	u32 ifindex, nhid;
	u32 hash_index;
	__be16 port;
	int err;

	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
			      &nhid);
	if (err)
		return err;

@@ -1228,6 +1368,17 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
			struct vxlan_rdst *rd;

			if (rcu_access_pointer(f->nh)) {
				err = vxlan_fdb_info(skb, vxlan, f,
						     NETLINK_CB(cb->skb).portid,
						     cb->nlh->nlmsg_seq,
						     RTM_NEWNEIGH,
						     NLM_F_MULTI, NULL);
				if (err < 0)
					goto out;
				continue;
			}

			list_for_each_entry_rcu(rd, &f->remotes, list) {
				if (*idx < cb->args[2])
					goto skip;
@@ -1311,6 +1462,10 @@ static bool vxlan_snoop(struct net_device *dev,
		if (f->state & (NUD_PERMANENT | NUD_NOARP))
			return true;

		/* Don't override an fdb with nexthop with a learnt entry */
		if (rcu_access_pointer(f->nh))
			return true;

		if (net_ratelimit())
			netdev_info(dev,
				    "%pM migrated from %pIS to %pIS\n",
@@ -1333,7 +1488,7 @@ static bool vxlan_snoop(struct net_device *dev,
					 vxlan->cfg.dst_port,
					 vni,
					 vxlan->default_dst.remote_vni,
					 ifindex, NTF_SELF, true, NULL);
					 ifindex, NTF_SELF, 0, true, NULL);
		spin_unlock(&vxlan->hash_lock[hash_index]);
	}

@@ -2616,6 +2771,38 @@ tx_error:
	kfree_skb(skb);
}

static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
			  struct vxlan_fdb *f, __be32 vni, bool did_rsc)
{
	struct vxlan_rdst nh_rdst;
	struct nexthop *nh;
	bool do_xmit;
	u32 hash;

	memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
	hash = skb_get_hash(skb);

	rcu_read_lock();
	nh = rcu_dereference(f->nh);
	if (!nh) {
		rcu_read_unlock();
		goto drop;
	}
	do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
	rcu_read_unlock();

	if (likely(do_xmit))
		vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
	else
		goto drop;

	return;

drop:
	dev->stats.tx_dropped++;
	dev_kfree_skb(skb);
}

/* Transmit local packets over Vxlan
 *
 * Outer IP header inherits ECN and DF from inner header.
@@ -2692,6 +2879,10 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
		}
	}

	if (rcu_access_pointer(f->nh)) {
		vxlan_xmit_nh(skb, dev, f,
			      (vni ? : vxlan->default_dst.remote_vni), did_rsc);
	} else {
		list_for_each_entry_rcu(rdst, &f->remotes, list) {
			struct sk_buff *skb1;

@@ -2703,11 +2894,12 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
			if (skb1)
				vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
		}

		if (fdst)
			vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
		else
			kfree_skb(skb);
	}

	return NETDEV_TX_OK;
}

@@ -3615,7 +3807,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
				       dst->remote_vni,
				       dst->remote_vni,
				       dst->remote_ifindex,
				       NTF_SELF, &f);
				       NTF_SELF, 0, &f, extack);
		if (err)
			return err;
	}
@@ -4013,7 +4205,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
					       vxlan->cfg.dst_port,
					       conf.vni, conf.vni,
					       conf.remote_ifindex,
					       NTF_SELF, true, extack);
					       NTF_SELF, 0, true, extack);
			if (err) {
				spin_unlock_bh(&vxlan->hash_lock[hash_index]);
				netdev_adjacent_change_abort(dst->remote_dev,
@@ -4335,7 +4527,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev,
			       fdb_info->remote_vni,
			       fdb_info->remote_ifindex,
			       NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
			       false, extack);
			       0, false, extack);
	spin_unlock_bh(&vxlan->hash_lock[hash_index]);

	return err;
@@ -4410,6 +4602,25 @@ static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
	.notifier_call = vxlan_switchdev_event,
};

static int vxlan_nexthop_event(struct notifier_block *nb,
			       unsigned long event, void *ptr)
{
	struct nexthop *nh = ptr;
	struct vxlan_fdb *fdb, *tmp;

	if (!nh || event != NEXTHOP_EVENT_DEL)
		return NOTIFY_DONE;

	list_for_each_entry_safe(fdb, tmp, &nh->fdb_list, nh_list)
		vxlan_fdb_destroy(fdb->vdev, fdb, false, false);

	return NOTIFY_DONE;
}

static struct notifier_block vxlan_nexthop_notifier_block __read_mostly = {
	.notifier_call = vxlan_nexthop_event,
};

static __net_init int vxlan_init_net(struct net *net)
{
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -4421,7 +4632,7 @@ static __net_init int vxlan_init_net(struct net *net)
	for (h = 0; h < PORT_HASH_SIZE; ++h)
		INIT_HLIST_HEAD(&vn->sock_list[h]);

	return 0;
	return register_nexthop_notifier(net, &vxlan_nexthop_notifier_block);
}

static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
@@ -4453,6 +4664,8 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
	LIST_HEAD(list);

	rtnl_lock();
	list_for_each_entry(net, net_list, exit_list)
		unregister_nexthop_notifier(net, &vxlan_nexthop_notifier_block);
	list_for_each_entry(net, net_list, exit_list)
		vxlan_destroy_tunnels(net, &list);

+1 −0
Original line number Diff line number Diff line
@@ -65,6 +65,7 @@ struct fib6_config {
	struct nl_info	fc_nlinfo;
	struct nlattr	*fc_encap;
	u16		fc_encap_type;
	bool		fc_is_fdb;
};

struct fib6_node {
+1 −0
Original line number Diff line number Diff line
@@ -14,5 +14,6 @@ struct netns_nexthop {

	unsigned int		seq;		/* protected by rtnl_mutex */
	u32			last_id_allocated;
	struct atomic_notifier_head notifier_chain;
};
#endif
+44 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#define __LINUX_NEXTHOP_H

#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/route.h>
#include <linux/types.h>
#include <net/ip_fib.h>
@@ -26,6 +27,7 @@ struct nh_config {
	u8		nh_family;
	u8		nh_protocol;
	u8		nh_blackhole;
	u8		nh_fdb;
	u32		nh_flags;

	int		nh_ifindex;
@@ -52,6 +54,7 @@ struct nh_info {

	u8			family;
	bool			reject_nh;
	bool			fdb_nh;

	union {
		struct fib_nh_common	fib_nhc;
@@ -80,6 +83,7 @@ struct nexthop {
	struct rb_node		rb_node;    /* entry on netns rbtree */
	struct list_head	fi_list;    /* v4 entries using nh */
	struct list_head	f6i_list;   /* v6 entries using nh */
	struct list_head        fdb_list;   /* fdb entries using this nh */
	struct list_head	grp_list;   /* nh group entries using this nh */
	struct net		*net;

@@ -88,6 +92,7 @@ struct nexthop {
	u8			protocol;   /* app managing this nh */
	u8			nh_flags;
	bool			is_group;
	bool			is_fdb_nh;

	refcount_t		refcnt;
	struct rcu_head		rcu;
@@ -98,6 +103,17 @@ struct nexthop {
	};
};

enum nexthop_event_type {
	NEXTHOP_EVENT_ADD,
	NEXTHOP_EVENT_DEL
};

int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
			  enum nexthop_event_type event_type,
			  struct nexthop *nh);
int register_nexthop_notifier(struct net *net, struct notifier_block *nb);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);

/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
void nexthop_free_rcu(struct rcu_head *head);
@@ -304,4 +320,32 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
int nexthop_for_each_fib6_nh(struct nexthop *nh,
			     int (*cb)(struct fib6_nh *nh, void *arg),
			     void *arg);

static inline int nexthop_get_family(struct nexthop *nh)
{
	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);

	return nhi->family;
}

static inline
struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
{
	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);

	return &nhi->fib_nhc;
}

static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
							    int hash)
{
	struct nh_info *nhi;
	struct nexthop *nhp;

	nhp = nexthop_select_path(nh, hash);
	if (unlikely(!nhp))
		return NULL;
	nhi = rcu_dereference(nhp->nh_info);
	return &nhi->fib_nhc;
}
#endif
+25 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <net/dst_metadata.h>
#include <net/rtnetlink.h>
#include <net/switchdev.h>
#include <net/nexthop.h>

#define IANA_VXLAN_UDP_PORT     4789

@@ -487,4 +488,28 @@ static inline void vxlan_flag_attr_error(int attrtype,
#undef VXLAN_FLAG
}

static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
					    int hash,
					    struct vxlan_rdst *rdst)
{
	struct fib_nh_common *nhc;

	nhc = nexthop_path_fdb_result(nh, hash);
	if (unlikely(!nhc))
		return false;

	switch (nhc->nhc_gw_family) {
	case AF_INET:
		rdst->remote_ip.sin.sin_addr.s_addr = nhc->nhc_gw.ipv4;
		rdst->remote_ip.sa.sa_family = AF_INET;
		break;
	case AF_INET6:
		rdst->remote_ip.sin6.sin6_addr = nhc->nhc_gw.ipv6;
		rdst->remote_ip.sa.sa_family = AF_INET6;
		break;
	}

	return true;
}

#endif
Loading