Commit a4b82994 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'ip_tunnel-next'



wenxu says:

====================
ip_tunnel: Refactor ip_gre collect metadata xmit to ip_md_tunnel_xmit

This patchset add tunnel_dst_cache and tnl_update_pmtu feature for
ip_md_tunnel_xmit also bugfix. Then Refactor collect metatdata mode
tunnel xmit to ip_md_tunnel_xmit
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 662a14d0 962924fa
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -267,7 +267,7 @@ void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		    const struct iphdr *tnl_params, const u8 protocol);
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		       const u8 proto);
		       const u8 proto, int tunnel_hlen);
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
+19 −93
Original line number Diff line number Diff line
@@ -458,81 +458,14 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum)
	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}

static struct rtable *gre_get_rt(struct sk_buff *skb,
				 struct net_device *dev,
				 struct flowi4 *fl,
				 const struct ip_tunnel_key *key)
{
	struct net *net = dev_net(dev);

	memset(fl, 0, sizeof(*fl));
	fl->daddr = key->u.ipv4.dst;
	fl->saddr = key->u.ipv4.src;
	fl->flowi4_tos = RT_TOS(key->tos);
	fl->flowi4_mark = skb->mark;
	fl->flowi4_proto = IPPROTO_GRE;

	return ip_route_output_key(net, fl);
}

static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
				      struct net_device *dev,
				      struct flowi4 *fl,
				      int tunnel_hlen)
{
	struct ip_tunnel_info *tun_info;
	const struct ip_tunnel_key *key;
	struct rtable *rt = NULL;
	int min_headroom;
	bool use_cache;
	int err;

	tun_info = skb_tunnel_info(skb);
	key = &tun_info->key;
	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);

	if (use_cache)
		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
	if (!rt) {
		rt = gre_get_rt(skb, dev, fl, key);
		if (IS_ERR(rt))
			goto err_free_skb;
		if (use_cache)
			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
					  fl->saddr);
	}

	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
			+ tunnel_hlen + sizeof(struct iphdr);
	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
		int head_delta = SKB_DATA_ALIGN(min_headroom -
						skb_headroom(skb) +
						16);
		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
				       0, GFP_ATOMIC);
		if (unlikely(err))
			goto err_free_rt;
	}
	return rt;

err_free_rt:
	ip_rt_put(rt);
err_free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
	return NULL;
}

static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
			__be16 proto)
{
	struct ip_tunnel *tunnel = netdev_priv(dev);
	struct ip_tunnel_info *tun_info;
	const struct ip_tunnel_key *key;
	struct rtable *rt = NULL;
	struct flowi4 fl;
	int tunnel_hlen;
	__be16 df, flags;
	__be16 flags;

	tun_info = skb_tunnel_info(skb);
	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -542,13 +475,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
	key = &tun_info->key;
	tunnel_hlen = gre_calc_hlen(key->tun_flags);

	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
	if (!rt)
		return;
	if (skb_cow_head(skb, dev->needed_headroom))
		goto err_free_skb;

	/* Push Tunnel header. */
	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
		goto err_free_rt;
		goto err_free_skb;

	flags = tun_info->key.tun_flags &
		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
@@ -556,14 +488,10 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
			 tunnel_id_to_key32(tun_info->key.tun_id),
			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);

	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);

	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
		      key->tos, key->ttl, df, false);
	return;

err_free_rt:
	ip_rt_put(rt);
err_free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
@@ -575,10 +503,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
	struct ip_tunnel_info *tun_info;
	const struct ip_tunnel_key *key;
	struct erspan_metadata *md;
	struct rtable *rt = NULL;
	bool truncate = false;
	__be16 df, proto;
	struct flowi4 fl;
	__be16 proto;
	int tunnel_hlen;
	int version;
	int nhoff;
@@ -591,21 +517,20 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)

	key = &tun_info->key;
	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
		goto err_free_rt;
		goto err_free_skb;
	md = ip_tunnel_info_opts(tun_info);
	if (!md)
		goto err_free_rt;
		goto err_free_skb;

	/* ERSPAN has fixed 8 byte GRE header */
	version = md->version;
	tunnel_hlen = 8 + erspan_hdr_len(version);

	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
	if (!rt)
		return;
	if (skb_cow_head(skb, dev->needed_headroom))
		goto err_free_skb;

	if (gre_handle_offloads(skb, false))
		goto err_free_rt;
		goto err_free_skb;

	if (skb->len > dev->mtu + dev->hard_header_len) {
		pskb_trim(skb, dev->mtu + dev->hard_header_len);
@@ -634,20 +559,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
				       truncate, true);
		proto = htons(ETH_P_ERSPAN2);
	} else {
		goto err_free_rt;
		goto err_free_skb;
	}

	gre_build_header(skb, 8, TUNNEL_SEQ,
			 proto, 0, htonl(tunnel->o_seqno++));

	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);

	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
		      key->tos, key->ttl, df, false);
	return;

err_free_rt:
	ip_rt_put(rt);
err_free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
@@ -656,13 +577,18 @@ err_free_skb:
static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
	struct ip_tunnel_info *info = skb_tunnel_info(skb);
	const struct ip_tunnel_key *key;
	struct rtable *rt;
	struct flowi4 fl4;

	if (ip_tunnel_info_af(info) != AF_INET)
		return -EINVAL;

	rt = gre_get_rt(skb, dev, &fl4, &info->key);
	key = &info->key;
	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
			    tunnel_id_to_key32(key->tun_id), key->tos, 0,
			    skb->mark);
	rt = ip_route_output_key(dev_net(dev), &fl4);
	if (IS_ERR(rt))
		return PTR_ERR(rt);

+45 −17
Original line number Diff line number Diff line
@@ -501,15 +501,19 @@ EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);

static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
			    struct rtable *rt, __be16 df,
			    const struct iphdr *inner_iph)
			    const struct iphdr *inner_iph,
			    int tunnel_hlen, __be32 dst, bool md)
{
	struct ip_tunnel *tunnel = netdev_priv(dev);
	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
	int pkt_size;
	int mtu;

	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
	pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;

	if (df)
		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
					- sizeof(struct iphdr) - tunnel->hlen;
					- sizeof(struct iphdr) - tunnel_hlen;
	else
		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;

@@ -527,11 +531,13 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
#if IS_ENABLED(CONFIG_IPV6)
	else if (skb->protocol == htons(ETH_P_IPV6)) {
		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
		__be32 daddr;

		daddr = md ? dst : tunnel->parms.iph.daddr;

		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
			   mtu >= IPV6_MIN_MTU) {
			if ((tunnel->parms.iph.daddr &&
			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
			if ((daddr && !ipv4_is_multicast(daddr)) ||
			    rt6->rt6i_dst.plen == 128) {
				rt6->rt6i_flags |= RTF_MODIFIED;
				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
@@ -548,17 +554,19 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
	return 0;
}

void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		       u8 proto, int tunnel_hlen)
{
	struct ip_tunnel *tunnel = netdev_priv(dev);
	u32 headroom = sizeof(struct iphdr);
	struct ip_tunnel_info *tun_info;
	const struct ip_tunnel_key *key;
	const struct iphdr *inner_iph;
	struct rtable *rt;
	struct rtable *rt = NULL;
	struct flowi4 fl4;
	__be16 df = 0;
	u8 tos, ttl;
	bool use_cache;

	tun_info = skb_tunnel_info(skb);
	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -574,20 +582,39 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
		else if (skb->protocol == htons(ETH_P_IPV6))
			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
	}
	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
			    RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
			    0, skb->mark);
	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
		goto tx_error;

	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
	if (use_cache)
		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
	if (!rt) {
		rt = ip_route_output_key(tunnel->net, &fl4);
		if (IS_ERR(rt)) {
			dev->stats.tx_carrier_errors++;
			goto tx_error;
		}
		if (use_cache)
			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
					  fl4.saddr);
	}
	if (rt->dst.dev == dev) {
		ip_rt_put(rt);
		dev->stats.collisions++;
		goto tx_error;
	}

	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
		df = htons(IP_DF);
	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
			    key->u.ipv4.dst, true)) {
		ip_rt_put(rt);
		goto tx_error;
	}

	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
	ttl = key->ttl;
	if (ttl == 0) {
@@ -598,10 +625,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
		else
			ttl = ip4_dst_hoplimit(&rt->dst);
	}
	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
		df = htons(IP_DF);
	else if (skb->protocol == htons(ETH_P_IP))

	if (!df && skb->protocol == htons(ETH_P_IP))
		df = inner_iph->frag_off & htons(IP_DF);

	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
	if (headroom > dev->needed_headroom)
		dev->needed_headroom = headroom;
@@ -731,7 +758,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		goto tx_error;
	}

	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
			    0, 0, false)) {
		ip_rt_put(rt);
		goto tx_error;
	}
+1 −1
Original line number Diff line number Diff line
@@ -302,7 +302,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
	skb_set_inner_ipproto(skb, ipproto);

	if (tunnel->collect_md)
		ip_md_tunnel_xmit(skb, dev, ipproto);
		ip_md_tunnel_xmit(skb, dev, ipproto, 0);
	else
		ip_tunnel_xmit(skb, dev, tiph, ipproto);
	return NETDEV_TX_OK;