Commit 4b1f5dda authored by David S. Miller's avatar David S. Miller
Browse files


Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next,
more specifically:

* Updates for ipset:

1) Coding style fix for ipset comment extension, from Jeremy Sowden.

2) De-inline many functions in ipset, from Jeremy Sowden.

3) Move ipset function definition from header to source file.

4) Move ip_set_put_flags() to source, export it as a symbol, remove
   inline.

5) Move range_to_mask() to the source file where this is used.

6) Move ip_set_get_ip_port() to the source file where this is used.

* IPVS selftests and netns improvements:

7) Two patches to speedup ipvs netns dismantle, from Haishuang Yan.

8) Three patches to add selftest script for ipvs, also from
   Haishuang Yan.

* Conntrack updates and new nf_hook_slow_list() function:

9) Document ct ecache extension, from Florian Westphal.

10) Skip ct extensions from ctnetlink dump, from Florian.

11) Free ct extension immediately, from Florian.

12) Skip access to ecache extension from nf_ct_deliver_cached_events()
    this is not correct as reported by Syzbot.

13) Add and use nf_hook_slow_list(), from Florian.

* Flowtable infrastructure updates:

14) Move priority to nf_flowtable definition.

15) Dynamic allocation of per-device hooks in flowtables.

16) Allow to include netdevice only once in flowtable definitions.

17) Rise maximum number of devices per flowtable.

* Netfilter hardware offload infrastructure updates:

18) Add nft_flow_block_chain() helper function.

19) Pass callback list to nft_setup_cb_call().

20) Add nft_flow_cls_offload_setup() helper function.

21) Remove rules for the unregistered device via netdevice event.

22) Support for multiple devices in a basechain definition at the
    ingress hook.

22) Add nft_chain_offload_cmd() helper function.

23) Add nft_flow_block_offload_init() helper function.

24) Rewind in case of failing to bind multiple devices to hook.

25) Typo in IPv6 tproxy module description, from Norman Rasmussen.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 64fe8e97 671312e1
Loading
Loading
Loading
Loading
+31 −10
Original line number Diff line number Diff line
@@ -199,6 +199,8 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
		 const struct nf_hook_entries *e, unsigned int i);

void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
		       const struct nf_hook_entries *e);
/**
 *	nf_hook - call a netfilter hook
 *
@@ -311,17 +313,36 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
	     struct list_head *head, struct net_device *in, struct net_device *out,
	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
{
	struct sk_buff *skb, *next;
	struct list_head sublist;

	INIT_LIST_HEAD(&sublist);
	list_for_each_entry_safe(skb, next, head, list) {
		list_del(&skb->list);
		if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1)
			list_add_tail(&skb->list, &sublist);
	struct nf_hook_entries *hook_head = NULL;

#ifdef CONFIG_JUMP_LABEL
	if (__builtin_constant_p(pf) &&
	    __builtin_constant_p(hook) &&
	    !static_key_false(&nf_hooks_needed[pf][hook]))
		return;
#endif

	rcu_read_lock();
	switch (pf) {
	case NFPROTO_IPV4:
		hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
		break;
	case NFPROTO_IPV6:
		hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
		break;
	default:
		WARN_ON_ONCE(1);
		break;
	}
	/* Put passed packets back on main list */
	list_splice(&sublist, head);

	if (hook_head) {
		struct nf_hook_state state;

		nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);

		nf_hook_slow_list(head, &state, hook_head);
	}
	rcu_read_unlock();
}

/* Call setsockopt() */
+8 −188
Original line number Diff line number Diff line
@@ -269,34 +269,15 @@ ip_set_ext_destroy(struct ip_set *set, void *data)
	/* Check that the extension is enabled for the set and
	 * call it's destroy function for its extension part in data.
	 */
	if (SET_WITH_COMMENT(set))
		ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(
			set, ext_comment(data, set));
}
	if (SET_WITH_COMMENT(set)) {
		struct ip_set_comment *c = ext_comment(data, set);

static inline int
ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
{
	u32 cadt_flags = 0;

	if (SET_WITH_TIMEOUT(set))
		if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
					   htonl(set->timeout))))
			return -EMSGSIZE;
	if (SET_WITH_COUNTER(set))
		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
	if (SET_WITH_COMMENT(set))
		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
	if (SET_WITH_SKBINFO(set))
		cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
	if (SET_WITH_FORCEADD(set))
		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;

	if (!cadt_flags)
		return 0;
	return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
		ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(set, c);
	}
}

int ip_set_put_flags(struct sk_buff *skb, struct ip_set *set);

/* Netlink CB args */
enum {
	IPSET_CB_NET = 0,	/* net namespace */
@@ -506,144 +487,8 @@ ip_set_timeout_set(unsigned long *timeout, u32 value)
	*timeout = t;
}

static inline u32
ip_set_timeout_get(const unsigned long *timeout)
{
	u32 t;

	if (*timeout == IPSET_ELEM_PERMANENT)
		return 0;

	t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
	/* Zero value in userspace means no timeout */
	return t == 0 ? 1 : t;
}

static inline char*
ip_set_comment_uget(struct nlattr *tb)
{
	return nla_data(tb);
}

/* Called from uadd only, protected by the set spinlock.
 * The kadt functions don't use the comment extensions in any way.
 */
static inline void
ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
		    const struct ip_set_ext *ext)
{
	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
	size_t len = ext->comment ? strlen(ext->comment) : 0;

	if (unlikely(c)) {
		set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
		kfree_rcu(c, rcu);
		rcu_assign_pointer(comment->c, NULL);
	}
	if (!len)
		return;
	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
		len = IPSET_MAX_COMMENT_SIZE;
	c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
	if (unlikely(!c))
		return;
	strlcpy(c->str, ext->comment, len + 1);
	set->ext_size += sizeof(*c) + strlen(c->str) + 1;
	rcu_assign_pointer(comment->c, c);
}

/* Used only when dumping a set, protected by rcu_read_lock() */
static inline int
ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
{
	struct ip_set_comment_rcu *c = rcu_dereference(comment->c);

	if (!c)
		return 0;
	return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}

/* Called from uadd/udel, flush or the garbage collectors protected
 * by the set spinlock.
 * Called when the set is destroyed and when there can't be any user
 * of the set data anymore.
 */
static inline void
ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment)
{
	struct ip_set_comment_rcu *c;

	c = rcu_dereference_protected(comment->c, 1);
	if (unlikely(!c))
		return;
	set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
	kfree_rcu(c, rcu);
	rcu_assign_pointer(comment->c, NULL);
}

static inline void
ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
{
	atomic64_add((long long)bytes, &(counter)->bytes);
}

static inline void
ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
{
	atomic64_add((long long)packets, &(counter)->packets);
}

static inline u64
ip_set_get_bytes(const struct ip_set_counter *counter)
{
	return (u64)atomic64_read(&(counter)->bytes);
}

static inline u64
ip_set_get_packets(const struct ip_set_counter *counter)
{
	return (u64)atomic64_read(&(counter)->packets);
}

static inline bool
ip_set_match_counter(u64 counter, u64 match, u8 op)
{
	switch (op) {
	case IPSET_COUNTER_NONE:
		return true;
	case IPSET_COUNTER_EQ:
		return counter == match;
	case IPSET_COUNTER_NE:
		return counter != match;
	case IPSET_COUNTER_LT:
		return counter < match;
	case IPSET_COUNTER_GT:
		return counter > match;
	}
	return false;
}

static inline void
ip_set_update_counter(struct ip_set_counter *counter,
		      const struct ip_set_ext *ext, u32 flags)
{
	if (ext->packets != ULLONG_MAX &&
	    !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
		ip_set_add_bytes(ext->bytes, counter);
		ip_set_add_packets(ext->packets, counter);
	}
}

static inline bool
ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
{
	return nla_put_net64(skb, IPSET_ATTR_BYTES,
			     cpu_to_be64(ip_set_get_bytes(counter)),
			     IPSET_ATTR_PAD) ||
	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
			     cpu_to_be64(ip_set_get_packets(counter)),
			     IPSET_ATTR_PAD);
}
void ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
			 const struct ip_set_ext *ext);

static inline void
ip_set_init_counter(struct ip_set_counter *counter,
@@ -655,31 +500,6 @@ ip_set_init_counter(struct ip_set_counter *counter,
		atomic64_set(&(counter)->packets, (long long)(ext->packets));
}

static inline void
ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
		   const struct ip_set_ext *ext,
		   struct ip_set_ext *mext, u32 flags)
{
	mext->skbinfo = *skbinfo;
}

static inline bool
ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
{
	/* Send nonzero parameters only */
	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
					  skbinfo->skbmarkmask),
			      IPSET_ATTR_PAD)) ||
	       (skbinfo->skbprio &&
		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
			      cpu_to_be32(skbinfo->skbprio))) ||
	       (skbinfo->skbqueue &&
		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
			      cpu_to_be16(skbinfo->skbqueue)));
}

static inline void
ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
		    const struct ip_set_ext *ext)
+0 −14
Original line number Diff line number Diff line
@@ -12,18 +12,4 @@ enum {
	IPSET_ADD_START_STORED_TIMEOUT,
};

/* Common functions */

static inline u32
range_to_mask(u32 from, u32 to, u8 *bits)
{
	u32 mask = 0xFFFFFFFE;

	*bits = 32;
	while (--(*bits) > 0 && mask && (to & mask) != from)
		mask <<= 1;

	return mask;
}

#endif /* __IP_SET_BITMAP_H */
+0 −3
Original line number Diff line number Diff line
@@ -20,9 +20,6 @@ static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
}
#endif

extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
				__be16 *port);

static inline bool ip_set_proto_with_ports(u8 proto)
{
	switch (proto) {
+1 −1
Original line number Diff line number Diff line
@@ -1324,7 +1324,7 @@ void ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_service_nets_cleanup(struct list_head *net_list);

/* IPVS application functions
 * (from ip_vs_app.c)
Loading