Commit 68e2617a authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlxsw-Remove-RTNL-from-route-insertion-path'



Ido Schimmel says:

====================
mlxsw: Remove RTNL from route insertion path

This patch set removes RTNL from the route insertion path in mlxsw in
order to reduce the control plane latency: the time it takes to push
routes from user space to the kernel and mlxsw.

Up until now mlxsw did not have a lock to protect its shared router data
structures and instead relied on RTNL. While this was simple and worked,
it resulted in large control plane latencies as RTNL was heavily
contended - by both the task receiving the netlink messages from user
space and the mlxsw workqueue that programs the routes to the device.

By removing RTNL and introducing a new router mutex, this patch set
reduces the control plane latency by ~80%. A single mutex is added as
inside mlxsw there is not a lot of concurrency. In addition, a more
fine-grained locking scheme is much more error-prone.

Patches #1-#6 are preparations. They add needed locking in NVE and
multicast routing code instead of relying on RTNL
Patch #7 introduces the new mutex
Patches #8-#12 gradually take the lock in various entry points into the
routing code
Patch #13 removes RTNL in places where it is no longer required
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 732a0dee 9811f7a2
Loading
Loading
Loading
Loading
+1 −4
Original line number Diff line number Diff line
@@ -548,7 +548,7 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
				 struct netdev_notifier_changeupper_info *info);
bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
				const struct net_device *dev);
bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
				const struct net_device *dev);
int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
				     struct net_device *l3_dev,
@@ -966,9 +966,6 @@ void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
			       struct mlxsw_sp_fid *fid,
			       enum mlxsw_sp_l3proto proto,
			       union mlxsw_sp_l3addr *addr);
u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
				      u32 tb_id, __be32 addr);
int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
			    struct mlxsw_sp_nve_params *params,
			    struct netlink_ext_ack *extack);
+18 −17
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */

#include <linux/kernel.h>
#include <linux/mutex.h>
#include <net/devlink.h>

#include "spectrum.h"
@@ -210,7 +211,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled,
		return err;

	rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
	rtnl_lock();
	mutex_lock(&mlxsw_sp->router->lock);
	i = 0;
start_again:
	err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
@@ -241,14 +242,14 @@ start_again:
	devlink_dpipe_entry_ctx_close(dump_ctx);
	if (i != rif_count)
		goto start_again;
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);

	devlink_dpipe_entry_clear(&entry);
	return 0;
err_entry_append:
err_entry_get:
err_ctx_prepare:
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);
	devlink_dpipe_entry_clear(&entry);
	return err;
}
@@ -258,7 +259,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
	struct mlxsw_sp *mlxsw_sp = priv;
	int i;

	rtnl_lock();
	mutex_lock(&mlxsw_sp->router->lock);
	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
		struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);

@@ -271,7 +272,7 @@ static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
			mlxsw_sp_rif_counter_free(mlxsw_sp, rif,
						  MLXSW_SP_RIF_COUNTER_EGRESS);
	}
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);
	return 0;
}

@@ -546,7 +547,7 @@ mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
	int i, j;
	int err;

	rtnl_lock();
	mutex_lock(&mlxsw_sp->router->lock);
	i = 0;
	rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
start_again:
@@ -602,12 +603,12 @@ out:
	if (i != rif_count)
		goto start_again;

	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);
	return 0;

err_ctx_prepare:
err_entry_append:
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);
	return err;
}

@@ -662,7 +663,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
{
	int i;

	rtnl_lock();
	mutex_lock(&mlxsw_sp->router->lock);
	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
		struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
		struct mlxsw_sp_neigh_entry *neigh_entry;
@@ -684,7 +685,7 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
							    enable);
		}
	}
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);
}

static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable)
@@ -701,7 +702,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type)
	u64 size = 0;
	int i;

	rtnl_lock();
	mutex_lock(&mlxsw_sp->router->lock);
	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
		struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
		struct mlxsw_sp_neigh_entry *neigh_entry;
@@ -721,7 +722,7 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type)
			size++;
		}
	}
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);

	return size;
}
@@ -1093,7 +1094,7 @@ mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
	int j;
	int err;

	rtnl_lock();
	mutex_lock(&mlxsw_sp->router->lock);
	nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
start_again:
	err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
@@ -1130,13 +1131,13 @@ skip:
	devlink_dpipe_entry_ctx_close(dump_ctx);
	if (nh_count != nh_count_max)
		goto start_again;
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);

	return 0;

err_ctx_prepare:
err_entry_append:
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);
	return err;
}

@@ -1206,9 +1207,9 @@ mlxsw_sp_dpipe_table_adj_size_get(void *priv)
	struct mlxsw_sp *mlxsw_sp = priv;
	u64 size;

	rtnl_lock();
	mutex_lock(&mlxsw_sp->router->lock);
	size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
	rtnl_unlock();
	mutex_unlock(&mlxsw_sp->router->lock);

	return size;
}
+39 −13
Original line number Diff line number Diff line
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */

#include <linux/mutex.h>
#include <linux/rhashtable.h>
#include <net/ipv6.h>

@@ -12,6 +13,7 @@ struct mlxsw_sp_mr {
	void *catchall_route_priv;
	struct delayed_work stats_update_dw;
	struct list_head table_list;
	struct mutex table_list_lock; /* Protects table_list */
#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */
	unsigned long priv[0];
	/* priv has to be always the last item */
@@ -66,6 +68,7 @@ struct mlxsw_sp_mr_table {
	u32 vr_id;
	struct mlxsw_sp_mr_vif vifs[MAXVIFS];
	struct list_head route_list;
	struct mutex route_list_lock; /* Protects route_list */
	struct rhashtable route_ht;
	const struct mlxsw_sp_mr_table_ops *ops;
	char catchall_route_priv[];
@@ -370,11 +373,13 @@ static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
				    struct mlxsw_sp_mr_route *mr_route)
{
	WARN_ON_ONCE(!mutex_is_locked(&mr_table->route_list_lock));

	mlxsw_sp_mr_mfc_offload_set(mr_route, false);
	mlxsw_sp_mr_route_erase(mr_table, mr_route);
	rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
			       mlxsw_sp_mr_route_ht_params);
	list_del(&mr_route->node);
	mlxsw_sp_mr_route_erase(mr_table, mr_route);
	mlxsw_sp_mr_route_destroy(mr_table, mr_route);
}

@@ -415,19 +420,21 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
		goto err_duplicate_route;
	}

	/* Write the route to the hardware */
	err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
	if (err)
		goto err_mr_route_write;

	/* Put it in the table data-structures */
	mutex_lock(&mr_table->route_list_lock);
	list_add_tail(&mr_route->node, &mr_table->route_list);
	mutex_unlock(&mr_table->route_list_lock);
	err = rhashtable_insert_fast(&mr_table->route_ht,
				     &mr_route->ht_node,
				     mlxsw_sp_mr_route_ht_params);
	if (err)
		goto err_rhashtable_insert;

	/* Write the route to the hardware */
	err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
	if (err)
		goto err_mr_route_write;

	/* Destroy the original route */
	if (replace) {
		rhashtable_remove_fast(&mr_table->route_ht,
@@ -440,11 +447,12 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
	mlxsw_sp_mr_mfc_offload_update(mr_route);
	return 0;

err_mr_route_write:
	rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
			       mlxsw_sp_mr_route_ht_params);
err_rhashtable_insert:
	mutex_lock(&mr_table->route_list_lock);
	list_del(&mr_route->node);
	mutex_unlock(&mr_table->route_list_lock);
	mlxsw_sp_mr_route_erase(mr_table, mr_route);
err_mr_route_write:
err_no_orig_route:
err_duplicate_route:
	mlxsw_sp_mr_route_destroy(mr_table, mr_route);
@@ -460,8 +468,11 @@ void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
	mr_table->ops->key_create(mr_table, &key, mfc);
	mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
					  mlxsw_sp_mr_route_ht_params);
	if (mr_route)
	if (mr_route) {
		mutex_lock(&mr_table->route_list_lock);
		__mlxsw_sp_mr_route_del(mr_table, mr_route);
		mutex_unlock(&mr_table->route_list_lock);
	}
}

/* Should be called after the VIF struct is updated */
@@ -910,6 +921,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
	mr_table->proto = proto;
	mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto];
	INIT_LIST_HEAD(&mr_table->route_list);
	mutex_init(&mr_table->route_list_lock);

	err = rhashtable_init(&mr_table->route_ht,
			      &mlxsw_sp_mr_route_ht_params);
@@ -927,12 +939,15 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
				       &catchall_route_params);
	if (err)
		goto err_ops_route_create;
	mutex_lock(&mr->table_list_lock);
	list_add_tail(&mr_table->node, &mr->table_list);
	mutex_unlock(&mr->table_list_lock);
	return mr_table;

err_ops_route_create:
	rhashtable_destroy(&mr_table->route_ht);
err_route_rhashtable_init:
	mutex_destroy(&mr_table->route_list_lock);
	kfree(mr_table);
	return ERR_PTR(err);
}
@@ -943,10 +958,13 @@ void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table)
	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;

	WARN_ON(!mlxsw_sp_mr_table_empty(mr_table));
	mutex_lock(&mr->table_list_lock);
	list_del(&mr_table->node);
	mutex_unlock(&mr->table_list_lock);
	mr->mr_ops->route_destroy(mlxsw_sp, mr->priv,
				  &mr_table->catchall_route_priv);
	rhashtable_destroy(&mr_table->route_ht);
	mutex_destroy(&mr_table->route_list_lock);
	kfree(mr_table);
}

@@ -955,8 +973,10 @@ void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table)
	struct mlxsw_sp_mr_route *mr_route, *tmp;
	int i;

	mutex_lock(&mr_table->route_list_lock);
	list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node)
		__mlxsw_sp_mr_route_del(mr_table, mr_route);
	mutex_unlock(&mr_table->route_list_lock);

	for (i = 0; i < MAXVIFS; i++) {
		mr_table->vifs[i].dev = NULL;
@@ -1000,12 +1020,15 @@ static void mlxsw_sp_mr_stats_update(struct work_struct *work)
	struct mlxsw_sp_mr_route *mr_route;
	unsigned long interval;

	rtnl_lock();
	list_for_each_entry(mr_table, &mr->table_list, node)
	mutex_lock(&mr->table_list_lock);
	list_for_each_entry(mr_table, &mr->table_list, node) {
		mutex_lock(&mr_table->route_list_lock);
		list_for_each_entry(mr_route, &mr_table->route_list, node)
			mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp,
						       mr_route);
	rtnl_unlock();
		mutex_unlock(&mr_table->route_list_lock);
	}
	mutex_unlock(&mr->table_list_lock);

	interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
	mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
@@ -1024,6 +1047,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
	mr->mr_ops = mr_ops;
	mlxsw_sp->mr = mr;
	INIT_LIST_HEAD(&mr->table_list);
	mutex_init(&mr->table_list_lock);

	err = mr_ops->init(mlxsw_sp, mr->priv);
	if (err)
@@ -1035,6 +1059,7 @@ int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
	mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
	return 0;
err:
	mutex_destroy(&mr->table_list_lock);
	kfree(mr);
	return err;
}
@@ -1045,5 +1070,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)

	cancel_delayed_work_sync(&mr->stats_update_dw);
	mr->mr_ops->fini(mlxsw_sp, mr->priv);
	mutex_destroy(&mr->table_list_lock);
	kfree(mr);
}
+0 −21
Original line number Diff line number Diff line
@@ -713,27 +713,6 @@ static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp,
	mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
}

u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp)
{
	WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0);

	return mlxsw_sp->nve->tunnel_index;
}

bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
				      u32 tb_id, __be32 addr)
{
	struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
	struct mlxsw_sp_nve_config *config = &nve->config;

	if (nve->num_nve_tunnels &&
	    config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 &&
	    config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id)
		return true;

	return false;
}

static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp,
				    struct mlxsw_sp_nve_config *config)
{
+166 −101

File changed.

Preview size limit exceeded, changes collapsed.

Loading