Commit e8ab563f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'flower-offload'



Amir Vadai says:

====================
cls_flower hardware offload support

Please see changes from V2 at the bottom.

This patchset introduces cls_flower hardware offload support over ConnectX-4
driver, more hardware vendors are welcome to use it too.

This patchset is based on John's infrastructure for tc offloading [2] to add
hardware offload support to the flower filter. It also extends the support to
an additional tc action - skbedit mark operation.
NIC driver that was used is ConnectX-4. Feature is off by default and could be
turned on using ethtool.

Some commands to use this code:

export TC=../iproute2/tc/tc
export ETH=ens9

ethtool  -K ens9 hw-tc-offload on

$TC qdisc add dev $ETH ingress

$TC filter add dev $ETH protocol ip prio 20 parent ffff: \
	flower ip_proto 1 \
	dst_mac 7c:fe:90:69:81:62 \
	src_mac 7c:fe:90:69:81:56 \
	dst_ip 11.11.11.11 \
	src_ip 11.11.11.12 \
	indev $ETH \
	action drop

$TC filter add dev $ETH protocol ip prio 30 parent ffff: \
	flower ip_proto 6 \
	indev $ETH \
	action skbedit mark 0x1234

$TC filter add dev $ETH protocol ip prio 10 parent ffff: \
	handle 0x1234 fw action pass

The code was tested and applied on top of commit 3ebeac1d ("Merge branch
'cxgb4-next'")

Changes from V2:
- patch 1/10 ("net/flower: Introduce hardware offload support")
  - Remove unused variable [Dave]
  - Don't fail command when HW can't offload filter [John]
- patch 3/10 ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef")
  - Mention in changelog that struct tc_action is now exposed out of the ifdef.
- patch 4/10 ("net/act_skbedit: Utility functions for mark action")
  - Document clearly that is_tcf_skbedit_mark() is returning true if and only
    if the only action is mark [Dave]
- patch 8/10 ("net/mlx5e: Introduce tc offload support")
  - make mlx5e_tc_add_flow() static

Changes from V1:
- patch 3/10 ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef")
  - fixed return value of tc_no_actions

Changes from V0:
- Use tc_no_actions and tc_for_each_action instead of ifdef CONFIG_NET_CLS_ACT
- Replace ENOTSUPP (and some EINVAL) with EOPNOTSUPP
- Name the flower command enum
- fl_hw_destroy_filter() to return void - nobody uses the return value
- mlx5e_tc_init() and mlx5e_tc_cleanup() to be called from the right places.
- When adding HW rule fails - fail the command
- Rules are added to be processed both by HW and SW unless SKIP_HW is given
- Adding patch 6/10 ("net/mlx5e: Relax ndo_setup_tc handle restriction")

Main changes from the RFC [1]:
- API
  - Using ndo_setup_tc() instead of switchdev
- act_skbedit, act_gact
  - Actions are not serialized to NIC driver, instead using access functions.
- cls_flower
  - prevent double classification by software by not adding
    successfuly offloaded filters to the hashtable
  - Fixed some bugs in original RFC with rule delete
- mlx5
  - Adding flow table to kernel namespace instead of a new namespace
  - s/offload/tc/ in many places
  - no need for a special kconfig since switchdev is not used
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 10f79037 12185a9f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -6,6 +6,6 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \

mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
		en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
		en_txrx.o en_clock.o vxlan.o
		en_txrx.o en_clock.o vxlan.o en_tc.o

mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
+9 −0
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/transobj.h>
#include <linux/rhashtable.h>
#include "wq.h"
#include "mlx5_core.h"

@@ -527,8 +528,16 @@ struct mlx5e_flow_table {
	struct mlx5_flow_group		**g;
};

struct mlx5e_tc_flow_table {
	struct mlx5_flow_table		*t;

	struct rhashtable_params        ht_params;
	struct rhashtable               ht;
};

struct mlx5e_flow_tables {
	struct mlx5_flow_namespace	*ns;
	struct mlx5e_tc_flow_table	tc;
	struct mlx5e_flow_table		vlan;
	struct mlx5e_flow_table		main;
};
+2 −2
Original line number Diff line number Diff line
@@ -1041,7 +1041,7 @@ static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv)
	int err;

	ft->num_groups = 0;
	ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE);
	ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE);

	if (IS_ERR(ft->t)) {
		err = PTR_ERR(ft->t);
@@ -1150,7 +1150,7 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv)
	int err;

	ft->num_groups = 0;
	ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE);
	ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE);

	if (IS_ERR(ft->t)) {
		err = PTR_ERR(ft->t);
+45 −2
Original line number Diff line number Diff line
@@ -30,9 +30,12 @@
 * SOFTWARE.
 */

#include <net/tc_act/tc_gact.h>
#include <net/pkt_cls.h>
#include <linux/mlx5/fs.h>
#include <net/vxlan.h>
#include "en.h"
#include "en_tc.h"
#include "eswitch.h"
#include "vxlan.h"

@@ -1883,7 +1886,25 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
			      __be16 proto, struct tc_to_netdev *tc)
{
	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
	struct mlx5e_priv *priv = netdev_priv(dev);

	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
		goto mqprio;

	switch (tc->type) {
	case TC_SETUP_CLSFLOWER:
		switch (tc->cls_flower->command) {
		case TC_CLSFLOWER_REPLACE:
			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
		case TC_CLSFLOWER_DESTROY:
			return mlx5e_delete_flower(priv, tc->cls_flower);
		}
	default:
		return -EOPNOTSUPP;
	}

mqprio:
	if (tc->type != TC_SETUP_MQPRIO)
		return -EINVAL;

	return mlx5e_setup_tc(dev, tc->tc);
@@ -1968,6 +1989,13 @@ static int mlx5e_set_features(struct net_device *netdev,
			mlx5e_disable_vlan_filter(priv);
	}

	if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
	    mlx5e_tc_num_filters(priv)) {
		netdev_err(netdev,
			   "Active offloaded tc filters, can't turn hw_tc_offload off\n");
		return -EINVAL;
	}

	return err;
}

@@ -2375,6 +2403,13 @@ static void mlx5e_build_netdev(struct net_device *netdev)
	if (!priv->params.lro_en)
		netdev->features  &= ~NETIF_F_LRO;

#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
	if (FT_CAP(flow_modify_en) &&
	    FT_CAP(modify_root) &&
	    FT_CAP(identified_miss_table_mode) &&
	    FT_CAP(flow_table_modify))
		priv->netdev->hw_features      |= NETIF_F_HW_TC;

	netdev->features         |= NETIF_F_HIGHDMA;

	netdev->priv_flags       |= IFF_UNICAST_FLT;
@@ -2496,6 +2531,10 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)

	mlx5e_vxlan_init(priv);

	err = mlx5e_tc_init(priv);
	if (err)
		goto err_destroy_flow_tables;

#ifdef CONFIG_MLX5_CORE_EN_DCB
	mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
#endif
@@ -2503,7 +2542,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
	err = register_netdev(netdev);
	if (err) {
		mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
		goto err_destroy_flow_tables;
		goto err_tc_cleanup;
	}

	if (mlx5e_vxlan_allowed(mdev))
@@ -2514,6 +2553,9 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)

	return priv;

err_tc_cleanup:
	mlx5e_tc_cleanup(priv);

err_destroy_flow_tables:
	mlx5e_destroy_flow_tables(priv);

@@ -2561,6 +2603,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
	mlx5e_disable_async_events(priv);
	flush_scheduled_work();
	unregister_netdev(netdev);
	mlx5e_tc_cleanup(priv);
	mlx5e_vxlan_cleanup(priv);
	mlx5e_destroy_flow_tables(priv);
	mlx5e_destroy_tirs(priv);
+3 −0
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#include <linux/tcp.h>
#include <net/busy_poll.h>
#include "en.h"
#include "en_tc.h"

static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
{
@@ -224,6 +225,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
	if (cqe_has_vlan(cqe))
		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
				       be16_to_cpu(cqe->vlan_info));

	skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
}

int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
Loading