Commit 11a0ae4c authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

RDMA: Allow ib_client's to fail when add() is called

When a client is added it isn't allowed to fail, but all the client's have
various failure paths within their add routines.

This creates the very fringe condition where the client was added, failed
during add and didn't set the client_data. The core code will then still
call other client_data centric ops like remove(), rename(), get_nl_info(),
and get_net_dev_by_params() with NULL client_data - which is confusing and
unexpected.

If the add() callback fails, then do not call any more client ops for the
device, even remove.

Remove all the now redundant checks for NULL client_data in ops callbacks.

Update all the add() callbacks to return error codes
appropriately. EOPNOTSUPP is used for cases where the ULP does not support
the ib_device - eg because it only works with IB.

Link: https://lore.kernel.org/r/20200421172440.387069-1-leon@kernel.org


Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Acked-by: default avatarUrsula Braun <ubraun@linux.ibm.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 04c349a9
Loading
Loading
Loading
Loading
+14 −10
Original line number Diff line number Diff line
@@ -81,7 +81,7 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
EXPORT_SYMBOL(ibcm_reject_msg);

struct cm_id_private;
static void cm_add_one(struct ib_device *device);
static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
				   struct ib_cm_sidr_rep_param *param);
@@ -4382,7 +4382,7 @@ static void cm_remove_port_fs(struct cm_port *port)

}

static void cm_add_one(struct ib_device *ib_device)
static int cm_add_one(struct ib_device *ib_device)
{
	struct cm_device *cm_dev;
	struct cm_port *port;
@@ -4401,7 +4401,7 @@ static void cm_add_one(struct ib_device *ib_device)
	cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
			 GFP_KERNEL);
	if (!cm_dev)
		return;
		return -ENOMEM;

	cm_dev->ib_device = ib_device;
	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
@@ -4413,8 +4413,10 @@ static void cm_add_one(struct ib_device *ib_device)
			continue;

		port = kzalloc(sizeof *port, GFP_KERNEL);
		if (!port)
		if (!port) {
			ret = -ENOMEM;
			goto error1;
		}

		cm_dev->port[i-1] = port;
		port->cm_dev = cm_dev;
@@ -4435,8 +4437,10 @@ static void cm_add_one(struct ib_device *ib_device)
							cm_recv_handler,
							port,
							0);
		if (IS_ERR(port->mad_agent))
		if (IS_ERR(port->mad_agent)) {
			ret = PTR_ERR(port->mad_agent);
			goto error2;
		}

		ret = ib_modify_port(ib_device, i, 0, &port_modify);
		if (ret)
@@ -4445,15 +4449,17 @@ static void cm_add_one(struct ib_device *ib_device)
		count++;
	}

	if (!count)
	if (!count) {
		ret = -EOPNOTSUPP;
		goto free;
	}

	ib_set_client_data(ib_device, &cm_client, cm_dev);

	write_lock_irqsave(&cm.device_lock, flags);
	list_add_tail(&cm_dev->list, &cm.device_list);
	write_unlock_irqrestore(&cm.device_lock, flags);
	return;
	return 0;

error3:
	ib_unregister_mad_agent(port->mad_agent);
@@ -4475,6 +4481,7 @@ error1:
	}
free:
	kfree(cm_dev);
	return ret;
}

static void cm_remove_one(struct ib_device *ib_device, void *client_data)
@@ -4489,9 +4496,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
	unsigned long flags;
	int i;

	if (!cm_dev)
		return;

	write_lock_irqsave(&cm.device_lock, flags);
	list_del(&cm_dev->list);
	write_unlock_irqrestore(&cm.device_lock, flags);
+12 −11
Original line number Diff line number Diff line
@@ -153,7 +153,7 @@ struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_res_to_id);

static void cma_add_one(struct ib_device *device);
static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);

static struct ib_client cma_client = {
@@ -4638,29 +4638,34 @@ static struct notifier_block cma_nb = {
	.notifier_call = cma_netdev_callback
};

static void cma_add_one(struct ib_device *device)
static int cma_add_one(struct ib_device *device)
{
	struct cma_device *cma_dev;
	struct rdma_id_private *id_priv;
	unsigned int i;
	unsigned long supported_gids = 0;
	int ret;

	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
	if (!cma_dev)
		return;
		return -ENOMEM;

	cma_dev->device = device;
	cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
					    sizeof(*cma_dev->default_gid_type),
					    GFP_KERNEL);
	if (!cma_dev->default_gid_type)
	if (!cma_dev->default_gid_type) {
		ret = -ENOMEM;
		goto free_cma_dev;
	}

	cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
					    sizeof(*cma_dev->default_roce_tos),
					    GFP_KERNEL);
	if (!cma_dev->default_roce_tos)
	if (!cma_dev->default_roce_tos) {
		ret = -ENOMEM;
		goto free_gid_type;
	}

	rdma_for_each_port (device, i) {
		supported_gids = roce_gid_type_mask_support(device, i);
@@ -4686,15 +4691,14 @@ static void cma_add_one(struct ib_device *device)
	mutex_unlock(&lock);

	trace_cm_add_one(device);
	return;
	return 0;

free_gid_type:
	kfree(cma_dev->default_gid_type);

free_cma_dev:
	kfree(cma_dev);

	return;
	return ret;
}

static int cma_remove_id_dev(struct rdma_id_private *id_priv)
@@ -4756,9 +4760,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data)

	trace_cm_remove_one(device);

	if (!cma_dev)
		return;

	mutex_lock(&lock);
	list_del(&cma_dev->list);
	mutex_unlock(&lock);
+14 −2
Original line number Diff line number Diff line
@@ -677,8 +677,20 @@ static int add_client_context(struct ib_device *device,
	if (ret)
		goto out;
	downgrade_write(&device->client_data_rwsem);
	if (client->add)
		client->add(device);
	if (client->add) {
		if (client->add(device)) {
			/*
			 * If a client fails to add then the error code is
			 * ignored, but we won't call any more ops on this
			 * client.
			 */
			xa_erase(&device->client_data, client->client_id);
			up_read(&device->client_data_rwsem);
			ib_device_put(device);
			ib_client_put(client);
			return 0;
		}
	}

	/* Readers shall not see a client until add has been completed */
	xa_set_mark(&device->client_data, client->client_id,
+13 −4
Original line number Diff line number Diff line
@@ -3076,9 +3076,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
	return 0;
}

static void ib_mad_init_device(struct ib_device *device)
static int ib_mad_init_device(struct ib_device *device)
{
	int start, i;
	unsigned int count = 0;
	int ret;

	start = rdma_start_port(device);

@@ -3086,17 +3088,23 @@ static void ib_mad_init_device(struct ib_device *device)
		if (!rdma_cap_ib_mad(device, i))
			continue;

		if (ib_mad_port_open(device, i)) {
		ret = ib_mad_port_open(device, i);
		if (ret) {
			dev_err(&device->dev, "Couldn't open port %d\n", i);
			goto error;
		}
		if (ib_agent_port_open(device, i)) {
		ret = ib_agent_port_open(device, i);
		if (ret) {
			dev_err(&device->dev,
				"Couldn't open port %d for agents\n", i);
			goto error_agent;
		}
		count++;
	}
	return;
	if (!count)
		return -EOPNOTSUPP;

	return 0;

error_agent:
	if (ib_mad_port_close(device, i))
@@ -3113,6 +3121,7 @@ error:
		if (ib_mad_port_close(device, i))
			dev_err(&device->dev, "Couldn't close port %d\n", i);
	}
	return ret;
}

static void ib_mad_remove_device(struct ib_device *device, void *client_data)
+5 −7
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@
#include <rdma/ib_cache.h>
#include "sa.h"

static void mcast_add_one(struct ib_device *device);
static int mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);

static struct ib_client mcast_client = {
@@ -815,7 +815,7 @@ static void mcast_event_handler(struct ib_event_handler *handler,
	}
}

static void mcast_add_one(struct ib_device *device)
static int mcast_add_one(struct ib_device *device)
{
	struct mcast_device *dev;
	struct mcast_port *port;
@@ -825,7 +825,7 @@ static void mcast_add_one(struct ib_device *device)
	dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
		      GFP_KERNEL);
	if (!dev)
		return;
		return -ENOMEM;

	dev->start_port = rdma_start_port(device);
	dev->end_port = rdma_end_port(device);
@@ -845,7 +845,7 @@ static void mcast_add_one(struct ib_device *device)

	if (!count) {
		kfree(dev);
		return;
		return -EOPNOTSUPP;
	}

	dev->device = device;
@@ -853,6 +853,7 @@ static void mcast_add_one(struct ib_device *device)

	INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
	ib_register_event_handler(&dev->event_handler);
	return 0;
}

static void mcast_remove_one(struct ib_device *device, void *client_data)
@@ -861,9 +862,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data)
	struct mcast_port *port;
	int i;

	if (!dev)
		return;

	ib_unregister_event_handler(&dev->event_handler);
	flush_workqueue(mcast_wq);

Loading