Commit df0651f8 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'ip6_tunnel-add-MPLS-support'



Vadim Fedorenko says:

====================
ip6_tunnel: add MPLS support

The support for MPLS-in-IPv4 was added earlier. This patchset adds
support for MPLS-in-IPv6.

Changes in v2:
- Eliminate ifdefs IS_ENABLE(CONFIG_MPLS)
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 87566b44 1515aa70
Loading
Loading
Loading
Loading
+143 −104
Original line number Diff line number Diff line
@@ -89,6 +89,11 @@ struct ip6_tnl_net {
	struct ip6_tnl __rcu *collect_md_tun;
};

static inline int ip6_tnl_mpls_supported(void)
{
	return IS_ENABLED(CONFIG_MPLS);
}

static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
	struct pcpu_sw_netstats tmp, sum = { 0 };
@@ -718,6 +723,20 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
	return 0;
}

static int
mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
	    u8 type, u8 code, int offset, __be32 info)
{
	__u32 rel_info = ntohl(info);
	int err, rel_msg = 0;
	u8 rel_type = type;
	u8 rel_code = code;

	err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
	return err;
}

static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
				       const struct ipv6hdr *ipv6h,
				       struct sk_buff *skb)
@@ -740,6 +759,14 @@ static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
	return IP6_ECN_decapsulate(ipv6h, skb);
}

static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
					       const struct ipv6hdr *ipv6h,
					       struct sk_buff *skb)
{
	/* ECN is not supported in AF_MPLS */
	return 0;
}

__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
			     const struct in6_addr *laddr,
			     const struct in6_addr *raddr)
@@ -901,6 +928,11 @@ static const struct tnl_ptk_info tpi_v4 = {
	.proto = htons(ETH_P_IP),
};

static const struct tnl_ptk_info tpi_mpls = {
	/* no tunnel info required for mplsip6. */
	.proto = htons(ETH_P_MPLS_UC),
};

static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
		      const struct tnl_ptk_info *tpi,
		      int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
@@ -958,6 +990,12 @@ static int ip6ip6_rcv(struct sk_buff *skb)
			  ip6ip6_dscp_ecn_decapsulate);
}

static int mplsip6_rcv(struct sk_buff *skb)
{
	return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls,
			  mplsip6_dscp_ecn_decapsulate);
}

struct ipv6_tel_txoption {
	struct ipv6_txoptions ops;
	__u8 dst_opt[8];
@@ -1232,6 +1270,8 @@ route_lookup:
		ipv6_push_frag_opts(skb, &opt.ops, &proto);
	}

	skb_set_inner_ipproto(skb, proto);

	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	ipv6h = ipv6_hdr(skb);
@@ -1253,22 +1293,22 @@ tx_err_dst_release:
EXPORT_SYMBOL(ip6_tnl_xmit);

static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
		u8 protocol)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct ipv6hdr *ipv6h;
	const struct iphdr  *iph;
	int encap_limit = -1;
	__u16 offset;
	struct flowi6 fl6;
	__u8 dsfield;
	__u8 dsfield, orig_dsfield;
	__u32 mtu;
	u8 tproto;
	int err;

	iph = ip_hdr(skb);
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

	tproto = READ_ONCE(t->parms.proto);
	if (tproto != IPPROTO_IPIP && tproto != 0)
	if (tproto != protocol && tproto != 0)
		return -1;

	if (t->parms.collect_md) {
@@ -1281,87 +1321,33 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
			return -1;
		key = &tun_info->key;
		memset(&fl6, 0, sizeof(fl6));
		fl6.flowi6_proto = IPPROTO_IPIP;
		fl6.flowi6_proto = protocol;
		fl6.saddr = key->u.ipv6.src;
		fl6.daddr = key->u.ipv6.dst;
		fl6.flowlabel = key->label;
		dsfield =  key->tos;
		switch (protocol) {
		case IPPROTO_IPIP:
			iph = ip_hdr(skb);
			orig_dsfield = ipv4_get_dsfield(iph);
			break;
		case IPPROTO_IPV6:
			ipv6h = ipv6_hdr(skb);
			orig_dsfield = ipv6_get_dsfield(ipv6h);
			break;
		default:
			orig_dsfield = dsfield;
			break;
		}
	} else {
		if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
			encap_limit = t->parms.encap_limit;

		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
		fl6.flowi6_proto = IPPROTO_IPIP;

		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
			dsfield = ipv4_get_dsfield(iph);
		else
			dsfield = ip6_tclass(t->parms.flowinfo);
		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
			fl6.flowi6_mark = skb->mark;
		else
			fl6.flowi6_mark = t->parms.fwmark;
	}

	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
	dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));

	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
		return -1;

	skb_set_inner_ipproto(skb, IPPROTO_IPIP);

	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
			   IPPROTO_IPIP);
	if (err != 0) {
		/* XXX: send ICMP error even if DF is not set. */
		if (err == -EMSGSIZE)
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
				  htonl(mtu));
		return -1;
	}

	return 0;
}

static inline int
ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct ipv6hdr *ipv6h;
	int encap_limit = -1;
	__u16 offset;
	struct flowi6 fl6;
	__u8 dsfield;
	__u32 mtu;
	u8 tproto;
	int err;

	ipv6h = ipv6_hdr(skb);
	tproto = READ_ONCE(t->parms.proto);
	if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
	    ip6_tnl_addr_conflict(t, ipv6h))
		return -1;

	if (t->parms.collect_md) {
		struct ip_tunnel_info *tun_info;
		const struct ip_tunnel_key *key;

		tun_info = skb_tunnel_info(skb);
		if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
			     ip_tunnel_info_af(tun_info) != AF_INET6))
			return -1;
		key = &tun_info->key;
		memset(&fl6, 0, sizeof(fl6));
		fl6.flowi6_proto = IPPROTO_IPV6;
		fl6.saddr = key->u.ipv6.src;
		fl6.daddr = key->u.ipv6.dst;
		fl6.flowlabel = key->label;
		dsfield = key->tos;
	} else {
		offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
		/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
		ipv6h = ipv6_hdr(skb);
		if (protocol == IPPROTO_IPV6) {
			offset = ip6_tnl_parse_tlv_enc_lim(skb,
						skb_network_header(skb));
			/* ip6_tnl_parse_tlv_enc_lim() might have
			 * reallocated skb->head
			 */
			if (offset > 0) {
				struct ipv6_tlv_tnl_enc_lim *tel;

@@ -1372,38 +1358,63 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
					return -1;
				}
				encap_limit = tel->encap_limit - 1;
		} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
			encap_limit = t->parms.encap_limit;
			}
		}

		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
		fl6.flowi6_proto = IPPROTO_IPV6;
		fl6.flowi6_proto = protocol;

		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
			fl6.flowi6_mark = skb->mark;
		else
			fl6.flowi6_mark = t->parms.fwmark;
		switch (protocol) {
		case IPPROTO_IPIP:
			iph = ip_hdr(skb);
			orig_dsfield = ipv4_get_dsfield(iph);
			if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
			dsfield = ipv6_get_dsfield(ipv6h);
				dsfield = orig_dsfield;
			else
				dsfield = ip6_tclass(t->parms.flowinfo);
			break;
		case IPPROTO_IPV6:
			ipv6h = ipv6_hdr(skb);
			orig_dsfield = ipv6_get_dsfield(ipv6h);
			if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
				dsfield = orig_dsfield;
			else
				dsfield = ip6_tclass(t->parms.flowinfo);
			if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
				fl6.flowlabel |= ip6_flowlabel(ipv6h);
		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
			fl6.flowi6_mark = skb->mark;
		else
			fl6.flowi6_mark = t->parms.fwmark;
			break;
		default:
			orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo);
			break;
		}
	}

	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
	dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
	dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield);

	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
		return -1;

	skb_set_inner_ipproto(skb, IPPROTO_IPV6);

	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
			   IPPROTO_IPV6);
			   protocol);
	if (err != 0) {
		/* XXX: send ICMP error even if DF is not set. */
		if (err == -EMSGSIZE)
			switch (protocol) {
			case IPPROTO_IPIP:
				icmp_send(skb, ICMP_DEST_UNREACH,
					  ICMP_FRAG_NEEDED, htonl(mtu));
				break;
			case IPPROTO_IPV6:
				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
				break;
			default:
				break;
			}
		return -1;
	}

@@ -1415,6 +1426,7 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct net_device_stats *stats = &t->dev->stats;
	u8 ipproto;
	int ret;

	if (!pskb_inet_may_pull(skb))
@@ -1422,15 +1434,21 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)

	switch (skb->protocol) {
	case htons(ETH_P_IP):
		ret = ip4ip6_tnl_xmit(skb, dev);
		ipproto = IPPROTO_IPIP;
		break;
	case htons(ETH_P_IPV6):
		ret = ip6ip6_tnl_xmit(skb, dev);
		if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb)))
			goto tx_err;
		ipproto = IPPROTO_IPV6;
		break;
	case htons(ETH_P_MPLS_UC):
		ipproto = IPPROTO_MPLS;
		break;
	default:
		goto tx_err;
	}

	ret = ipxip6_tnl_xmit(skb, dev, ipproto);
	if (ret < 0)
		goto tx_err;

@@ -2218,6 +2236,12 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
	.priority	=	1,
};

static struct xfrm6_tunnel mplsip6_handler __read_mostly = {
	.handler	= mplsip6_rcv,
	.err_handler	= mplsip6_err,
	.priority	=	1,
};

static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
{
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -2332,6 +2356,15 @@ static int __init ip6_tunnel_init(void)
		pr_err("%s: can't register ip6ip6\n", __func__);
		goto out_ip6ip6;
	}

	if (ip6_tnl_mpls_supported()) {
		err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS);
		if (err < 0) {
			pr_err("%s: can't register mplsip6\n", __func__);
			goto out_mplsip6;
		}
	}

	err = rtnl_link_register(&ip6_link_ops);
	if (err < 0)
		goto rtnl_link_failed;
@@ -2339,6 +2372,9 @@ static int __init ip6_tunnel_init(void)
	return 0;

rtnl_link_failed:
	if (ip6_tnl_mpls_supported())
		xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS);
out_mplsip6:
	xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
out_ip6ip6:
	xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
@@ -2361,6 +2397,9 @@ static void __exit ip6_tunnel_cleanup(void)
	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
		pr_info("%s: can't deregister ip6ip6\n", __func__);

	if (ip6_tnl_mpls_supported() &&
	    xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS))
		pr_info("%s: can't deregister mplsip6\n", __func__);
	unregister_pernet_device(&ip6_tnl_net_ops);
}

+83 −4
Original line number Diff line number Diff line
@@ -21,8 +21,14 @@

static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
static struct xfrm6_tunnel __rcu *tunnelmpls6_handlers __read_mostly;
static DEFINE_MUTEX(tunnel6_mutex);

static inline int xfrm6_tunnel_mpls_supported(void)
{
	return IS_ENABLED(CONFIG_MPLS);
}

int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
{
	struct xfrm6_tunnel __rcu **pprev;
@@ -32,8 +38,21 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)

	mutex_lock(&tunnel6_mutex);

	for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
	     (t = rcu_dereference_protected(*pprev,
	switch (family) {
	case AF_INET6:
		pprev = &tunnel6_handlers;
		break;
	case AF_INET:
		pprev = &tunnel46_handlers;
		break;
	case AF_MPLS:
		pprev = &tunnelmpls6_handlers;
		break;
	default:
		goto err;
	}

	for (; (t = rcu_dereference_protected(*pprev,
			lockdep_is_held(&tunnel6_mutex))) != NULL;
	     pprev = &t->next) {
		if (t->priority > priority)
@@ -62,8 +81,21 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)

	mutex_lock(&tunnel6_mutex);

	for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
	     (t = rcu_dereference_protected(*pprev,
	switch (family) {
	case AF_INET6:
		pprev = &tunnel6_handlers;
		break;
	case AF_INET:
		pprev = &tunnel46_handlers;
		break;
	case AF_MPLS:
		pprev = &tunnelmpls6_handlers;
		break;
	default:
		goto err;
	}

	for (; (t = rcu_dereference_protected(*pprev,
			lockdep_is_held(&tunnel6_mutex))) != NULL;
	     pprev = &t->next) {
		if (t == handler) {
@@ -73,6 +105,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
		}
	}

err:
	mutex_unlock(&tunnel6_mutex);

	synchronize_net();
@@ -86,6 +119,24 @@ EXPORT_SYMBOL(xfrm6_tunnel_deregister);
	     handler != NULL;				\
	     handler = rcu_dereference(handler->next))	\

static int tunnelmpls6_rcv(struct sk_buff *skb)
{
	struct xfrm6_tunnel *handler;

	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
		goto drop;

	for_each_tunnel_rcu(tunnelmpls6_handlers, handler)
		if (!handler->handler(skb))
			return 0;

	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);

drop:
	kfree_skb(skb);
	return 0;
}

static int tunnel6_rcv(struct sk_buff *skb)
{
	struct xfrm6_tunnel *handler;
@@ -146,6 +197,18 @@ static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
	return -ENOENT;
}

static int tunnelmpls6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
			   u8 type, u8 code, int offset, __be32 info)
{
	struct xfrm6_tunnel *handler;

	for_each_tunnel_rcu(tunnelmpls6_handlers, handler)
		if (!handler->err_handler(skb, opt, type, code, offset, info))
			return 0;

	return -ENOENT;
}

static const struct inet6_protocol tunnel6_protocol = {
	.handler	= tunnel6_rcv,
	.err_handler	= tunnel6_err,
@@ -158,6 +221,12 @@ static const struct inet6_protocol tunnel46_protocol = {
	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};

static const struct inet6_protocol tunnelmpls6_protocol = {
	.handler	= tunnelmpls6_rcv,
	.err_handler	= tunnelmpls6_err,
	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};

static int __init tunnel6_init(void)
{
	if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) {
@@ -169,6 +238,13 @@ static int __init tunnel6_init(void)
		inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
		return -EAGAIN;
	}
	if (xfrm6_tunnel_mpls_supported() &&
	    inet6_add_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) {
		pr_err("%s: can't add protocol\n", __func__);
		inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
		inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP);
		return -EAGAIN;
	}
	return 0;
}

@@ -178,6 +254,9 @@ static void __exit tunnel6_fini(void)
		pr_err("%s: can't remove protocol\n", __func__);
	if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
		pr_err("%s: can't remove protocol\n", __func__);
	if (xfrm6_tunnel_mpls_supported() &&
	    inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS))
		pr_err("%s: can't remove protocol\n", __func__);
}

module_init(tunnel6_init);
+2 −1
Original line number Diff line number Diff line
@@ -1593,7 +1593,8 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
		    dev->type == ARPHRD_IPGRE ||
		    dev->type == ARPHRD_IP6GRE ||
		    dev->type == ARPHRD_SIT ||
		    dev->type == ARPHRD_TUNNEL) {
		    dev->type == ARPHRD_TUNNEL ||
		    dev->type == ARPHRD_TUNNEL6) {
			mdev = mpls_add_dev(dev);
			if (IS_ERR(mdev))
				return notifier_from_errno(PTR_ERR(mdev));