Commit b4a23329 authored by Roi Dayan's avatar Roi Dayan Committed by Saeed Mahameed
Browse files

net/mlx5e: Re-attempt to offload flows on multipath port affinity events



Under multipath it's possible for us to offload the flow only through
the e-switch for which proper route through the uplink exists.
When the port is up and the next-hop route is set again we want to
offload through it as well.

We generate SW event from the FIB event handler when multipath port
affinity changes. The tc offloads code gets this event, goes over the
flows which were marked as of having missing route and attempts to
offload them.

Signed-off-by: default avatarRoi Dayan <roid@mellanox.com>
Reviewed-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 6997b1c9
Loading
Loading
Loading
Loading
+29 −11
Original line number Diff line number Diff line
@@ -1573,6 +1573,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
	if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
		uplink_priv = &rpriv->uplink_priv;

		INIT_LIST_HEAD(&uplink_priv->unready_flows);

		/* init shared tc flow table */
		err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
		if (err)
@@ -1632,10 +1634,9 @@ static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
{
	struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
	struct mlx5_eqe   *eqe = data;

	if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
		return NOTIFY_DONE;
	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
		struct mlx5_eqe *eqe = data;

		switch (eqe->sub_type) {
		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
@@ -1649,10 +1650,22 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event
		return NOTIFY_OK;
	}

	if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
		struct mlx5e_rep_priv *rpriv = priv->ppriv;

		queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);

		return NOTIFY_OK;
	}

	return NOTIFY_DONE;
}

static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
	struct net_device *netdev = priv->netdev;
	struct mlx5_core_dev *mdev = priv->mdev;
	struct mlx5e_rep_priv *rpriv = priv->ppriv;
	u16 max_mtu;

	netdev->min_mtu = ETH_MIN_MTU;
@@ -1660,6 +1673,9 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
	netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
	mlx5e_set_dev_port_mtu(priv);

	INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
		  mlx5e_tc_reoffload_flows_work);

	mlx5_lag_add(mdev, netdev);
	priv->events_nb.notifier_call = uplink_rep_async_event;
	mlx5_notifier_register(mdev, &priv->events_nb);
@@ -1672,11 +1688,13 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
	struct mlx5_core_dev *mdev = priv->mdev;
	struct mlx5e_rep_priv *rpriv = priv->ppriv;

#ifdef CONFIG_MLX5_CORE_EN_DCB
	mlx5e_dcbnl_delete_app(priv);
#endif
	mlx5_notifier_unregister(mdev, &priv->events_nb);
	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
	mlx5_lag_remove(mdev);
}

+3 −0
Original line number Diff line number Diff line
@@ -74,6 +74,9 @@ struct mlx5_rep_uplink_priv {
	struct notifier_block	    netdevice_nb;

	struct mlx5_tun_entropy tun_entropy;

	struct list_head            unready_flows;
	struct work_struct          reoffload_flows_work;
};

struct mlx5e_rep_priv {
+38 −1
Original line number Diff line number Diff line
@@ -117,6 +117,7 @@ struct mlx5e_tc_flow {
	struct list_head	mod_hdr; /* flows sharing the same mod hdr ID */
	struct list_head	hairpin; /* flows sharing the same hairpin */
	struct list_head	peer;    /* flows with peer flow */
	struct list_head	unready; /* flows not ready to be offloaded (e.g due to missing route) */
	union {
		struct mlx5_esw_flow_attr esw_attr[0];
		struct mlx5_nic_flow_attr nic_attr[0];
@@ -928,6 +929,26 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
	flow->flags &= ~MLX5E_TC_FLOW_SLOW;
}

static void add_unready_flow(struct mlx5e_tc_flow *flow)
{
	struct mlx5_rep_uplink_priv *uplink_priv;
	struct mlx5e_rep_priv *rpriv;
	struct mlx5_eswitch *esw;

	esw = flow->priv->mdev->priv.eswitch;
	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
	uplink_priv = &rpriv->uplink_priv;

	flow->flags |= MLX5E_TC_FLOW_NOT_READY;
	list_add_tail(&flow->unready, &uplink_priv->unready_flows);
}

static void remove_unready_flow(struct mlx5e_tc_flow *flow)
{
	list_del(&flow->unready);
	flow->flags &= ~MLX5E_TC_FLOW_NOT_READY;
}

static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
		      struct mlx5e_tc_flow *flow,
@@ -1049,6 +1070,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
	int out_index;

	if (flow->flags & MLX5E_TC_FLOW_NOT_READY) {
		remove_unready_flow(flow);
		kvfree(attr->parse_attr);
		return;
	}
@@ -2810,7 +2832,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
		if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
			goto err_free;

		flow->flags |= MLX5E_TC_FLOW_NOT_READY;
		add_unready_flow(flow);
	}

	return flow;
@@ -3214,3 +3236,18 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
	list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
		__mlx5e_tc_del_fdb_peer_flow(flow);
}

void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
{
	struct mlx5_rep_uplink_priv *rpriv =
		container_of(work, struct mlx5_rep_uplink_priv,
			     reoffload_flows_work);
	struct mlx5e_tc_flow *flow, *tmp;

	rtnl_lock();
	list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
		if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
			remove_unready_flow(flow);
	}
	rtnl_unlock();
}
+1 −0
Original line number Diff line number Diff line
@@ -72,6 +72,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);

int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);

void mlx5e_tc_reoffload_flows_work(struct work_struct *work);

#else /* CONFIG_MLX5_ESWITCH */
static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }