Commit f6310b61 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-Add-route-offload-indication'

Ido Schimmel says:

====================
net: Add route offload indication

This patch set adds offload indication to IPv4 and IPv6 routes. So far
offload indication was only available for the nexthop via
'RTNH_F_OFFLOAD', which is problematic as a nexthop is usually shared
between multiple routes.

Based on feedback from Roopa and David on the RFC [1], the indication is
split to 'offload' and 'trap'. This is done because not all the routes
present in hardware actually offload traffic from the kernel. For
example, host routes merely trap packets to the kernel. The two flags
are dumped to user space via the 'rtm_flags' field in the ancillary
header of the rtnetlink message.

In addition, the patch set uses the new flags in order to test the FIB
offload API by adding a dummy FIB offload implementation to netdevsim.
The new tests are added to a shared library and can be therefore shared
between different drivers.

Patches #1-#3 add offload indication to IPv4 routes.
Patches #4 adds offload indication to IPv6 routes.
Patches #5-#6 add support for the offload indication in mlxsw.
Patch #7 adds dummy FIB offload implementation in netdevsim.
Patches #8-#10 add selftests.

v2 (feedback from David Ahern):
* Patch #2: Name last argument of fib_dump_info()
* Patch #2: Move 'struct fib_rt_info' to include/net/ip_fib.h so that it
  could later be passed to fib_alias_hw_flags_set()
* Patch #3: Make use of 'struct fib_rt_info' in fib_alias_hw_flags_set()
* Patch #6: Convert to new fib_alias_hw_flags_set() interface
* Patch #7: Convert to new fib_alias_hw_flags_set() interface

[1] https://patchwork.ozlabs.org/cover/1170530/


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0fadc0a2 212a37c2
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -549,6 +549,7 @@ source "drivers/net/hyperv/Kconfig"
config NETDEVSIM
	tristate "Simulated networking device"
	depends on DEBUG_FS
	depends on IPV6 || IPV6=n
	select NET_DEVLINK
	help
	  This driver is a developer testing tool and software model that can
+152 −94
Original line number Diff line number Diff line
@@ -3235,20 +3235,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
	return 0;
}

static void
mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
				   enum mlxsw_reg_ralue_op op, int err);

static void
mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
{
	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
	struct mlxsw_sp_fib_entry *fib_entry;

	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node)
		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
}

static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
{
	/* Valid sizes for an adjacency group are:
@@ -3352,6 +3338,73 @@ mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
	}
}

static struct mlxsw_sp_nexthop *
mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);

static void
mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
					struct mlxsw_sp_nexthop_group *nh_grp)
{
	int i;

	for (i = 0; i < nh_grp->count; i++) {
		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];

		if (nh->offloaded)
			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
		else
			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
	}
}

static void
__mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
					  struct mlxsw_sp_fib6_entry *fib6_entry)
{
	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;

	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
		struct mlxsw_sp_nexthop *nh;

		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
		if (nh && nh->offloaded)
			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
		else
			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
	}
}

static void
mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
					struct mlxsw_sp_nexthop_group *nh_grp)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;

	/* Unfortunately, in IPv6 the route and the nexthop are described by
	 * the same struct, so we need to iterate over all the routes using the
	 * nexthop group and set / clear the offload indication for them.
	 */
	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
			    common.nexthop_group_node)
		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
}

static void
mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
				       struct mlxsw_sp_nexthop_group *nh_grp)
{
	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
	case AF_INET:
		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
		break;
	case AF_INET6:
		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
		break;
	}
}

static void
mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
			       struct mlxsw_sp_nexthop_group *nh_grp)
@@ -3425,6 +3478,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
		goto set_trap;
	}

	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);

	if (!old_adj_index_valid) {
		/* The trap was set for fib entries, so we have to call
		 * fib entry update to unset it and use adjacency index.
@@ -3446,9 +3501,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
		goto set_trap;
	}

	/* Offload state within the group changed, so update the flags. */
	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);

	return;

set_trap:
@@ -3461,6 +3513,7 @@ set_trap:
	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
	if (err)
		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
	if (old_adj_index_valid)
		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
				   nh_grp->ecmp_size, nh_grp->adj_index);
@@ -4043,131 +4096,128 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
}

static void
mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
				 struct mlxsw_sp_fib_entry *fib_entry)
{
	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
	int i;

	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
		nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
		return;
	}

	for (i = 0; i < nh_grp->count; i++) {
		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
	int dst_len = fib_entry->fib_node->key.prefix_len;
	struct mlxsw_sp_fib4_entry *fib4_entry;
	struct fib_rt_info fri;
	bool should_offload;

		if (nh->offloaded)
			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
		else
			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
	}
	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
				  common);
	fri.fi = fi;
	fri.tb_id = fib4_entry->tb_id;
	fri.dst = cpu_to_be32(*p_dst);
	fri.dst_len = dst_len;
	fri.tos = fib4_entry->tos;
	fri.type = fib4_entry->type;
	fri.offload = should_offload;
	fri.trap = !should_offload;
	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}

static void
mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
				   struct mlxsw_sp_fib_entry *fib_entry)
{
	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
	int i;

	if (!list_is_singular(&nh_grp->fib_list))
		return;

	for (i = 0; i < nh_grp->count; i++) {
		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
	int dst_len = fib_entry->fib_node->key.prefix_len;
	struct mlxsw_sp_fib4_entry *fib4_entry;
	struct fib_rt_info fri;

		nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
	}
	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
				  common);
	fri.fi = fi;
	fri.tb_id = fib4_entry->tb_id;
	fri.dst = cpu_to_be32(*p_dst);
	fri.dst_len = dst_len;
	fri.tos = fib4_entry->tos;
	fri.type = fib4_entry->type;
	fri.offload = false;
	fri.trap = false;
	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}

static void
mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
				 struct mlxsw_sp_fib_entry *fib_entry)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;
	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
	bool should_offload;

	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);

	/* In IPv6 a multipath route is represented using multiple routes, so
	 * we need to set the flags on all of them.
	 */
	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
				  common);

	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
				 list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
		return;
	}

	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
		struct mlxsw_sp_nexthop *nh;

		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
		if (nh && nh->offloaded)
			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
		else
			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
	}
	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
				       !should_offload);
}

static void
mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
				   struct mlxsw_sp_fib_entry *fib_entry)
{
	struct mlxsw_sp_fib6_entry *fib6_entry;
	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;

	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
				  common);
	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
		struct fib6_info *rt = mlxsw_sp_rt6->rt;

		rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
	}
	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
}

static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
static void
mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
				struct mlxsw_sp_fib_entry *fib_entry)
{
	switch (fib_entry->fib_node->fib->proto) {
	case MLXSW_SP_L3_PROTO_IPV4:
		mlxsw_sp_fib4_entry_offload_set(fib_entry);
		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
		break;
	case MLXSW_SP_L3_PROTO_IPV6:
		mlxsw_sp_fib6_entry_offload_set(fib_entry);
		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
		break;
	}
}

static void
mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
				  struct mlxsw_sp_fib_entry *fib_entry)
{
	switch (fib_entry->fib_node->fib->proto) {
	case MLXSW_SP_L3_PROTO_IPV4:
		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
		break;
	case MLXSW_SP_L3_PROTO_IPV6:
		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
		break;
	}
}

static void
mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
				   enum mlxsw_reg_ralue_op op, int err)
mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
				    struct mlxsw_sp_fib_entry *fib_entry,
				    enum mlxsw_reg_ralue_op op)
{
	switch (op) {
	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
		if (err)
			return;
		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
			mlxsw_sp_fib_entry_offload_set(fib_entry);
		else
			mlxsw_sp_fib_entry_offload_unset(fib_entry);
		return;
		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
		break;
	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
		break;
	default:
		return;
		break;
	}
}

@@ -4394,7 +4444,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
{
	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);

	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
	if (err)
		return err;

	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);

	return err;
}
@@ -4830,7 +4883,7 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
	if (!replaced)
		return 0;

	mlxsw_sp_fib_entry_offload_unset(replaced);
	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
				     common);
	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
@@ -5113,6 +5166,11 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
		      &nh_grp->fib_list);
	fib6_entry->common.nh_group = nh_grp;

	/* The route and the nexthop are described by the same struct, so we
	 * need to the update the nexthop offload indication for the new route.
	 */
	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);

	return 0;
}

@@ -5393,7 +5451,7 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
	if (!replaced)
		return 0;

	mlxsw_sp_fib_entry_offload_unset(replaced);
	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
				     common);
	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
+662 −9

File changed.

Preview size limit exceeded, changes collapsed.

+10 −1
Original line number Diff line number Diff line
@@ -192,7 +192,9 @@ struct fib6_info {
					dst_nopolicy:1,
					dst_host:1,
					fib6_destroying:1,
					unused:3;
					offload:1,
					trap:1,
					unused:1;

	struct rcu_head			rcu;
	struct nexthop			*nh;
@@ -329,6 +331,13 @@ static inline void fib6_info_release(struct fib6_info *f6i)
		call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
}

static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload,
					  bool trap)
{
	f6i->offload = offload;
	f6i->trap = trap;
}

enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
	FWS_S,
+13 −0
Original line number Diff line number Diff line
@@ -204,6 +204,18 @@ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res);
#define FIB_RES_DEV(res)	(FIB_RES_NHC(res)->nhc_dev)
#define FIB_RES_OIF(res)	(FIB_RES_NHC(res)->nhc_oif)

struct fib_rt_info {
	struct fib_info		*fi;
	u32			tb_id;
	__be32			dst;
	int			dst_len;
	u8			tos;
	u8			type;
	u8			offload:1,
				trap:1,
				unused:6;
};

struct fib_entry_notifier_info {
	struct fib_notifier_info info; /* must be first */
	u32 dst;
@@ -464,6 +476,7 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap,
void fib_nh_common_release(struct fib_nh_common *nhc);

/* Exported by fib_trie.c */
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri);
void fib_trie_init(void);
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias);

Loading