Commit dce45af5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull rdma updates from Jason Gunthorpe:
 "This has been a smaller cycle than normal. One new driver was
  accepted, which is unusual, and at least one more driver remains in
  review on the list.

  Summary:

   - Driver fixes for hns, hfi1, nes, rxe, i40iw, mlx5, cxgb4,
     vmw_pvrdma

   - Many patches from MatthewW converting radix tree and IDR users to
     use xarray

   - Introduction of tracepoints to the MAD layer

   - Build large SGLs at the start for DMA mapping and get the driver to
     split them

   - Generally clean SGL handling code throughout the subsystem

   - Support for restricting RDMA devices to net namespaces for
     containers

   - Progress to remove object allocation boilerplate code from drivers

   - Change in how the mlx5 driver shows representor ports linked to VFs

   - mlx5 uapi feature to access the on chip SW ICM memory

   - Add a new driver for 'EFA'. This is HW that supports user space
     packet processing through QPs in Amazon's cloud"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (186 commits)
  RDMA/ipoib: Allow user space differentiate between valid dev_port
  IB/core, ipoib: Do not overreact to SM LID change event
  RDMA/device: Don't fire uevent before device is fully initialized
  lib/scatterlist: Remove leftover from sg_page_iter comment
  RDMA/efa: Add driver to Kconfig/Makefile
  RDMA/efa: Add the efa module
  RDMA/efa: Add EFA verbs implementation
  RDMA/efa: Add common command handlers
  RDMA/efa: Implement functions that submit and complete admin commands
  RDMA/efa: Add the ABI definitions
  RDMA/efa: Add the com service API definitions
  RDMA/efa: Add the efa_com.h file
  RDMA/efa: Add the efa.h header file
  RDMA/efa: Add EFA device definitions
  RDMA: Add EFA related definitions
  RDMA/umem: Remove hugetlb flag
  RDMA/bnxt_re: Use core helpers to get aligned DMA address
  RDMA/i40iw: Use core helpers to get aligned DMA address within a supported page size
  RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks
  RDMA/umem: Add API to find best driver supported page size in an MR
  ...
parents 055128ee b79656ed
Loading
Loading
Loading
Loading
+15 −2
Original line number Diff line number Diff line
@@ -745,6 +745,15 @@ S: Supported
F:	Documentation/networking/device_drivers/amazon/ena.txt
F:	drivers/net/ethernet/amazon/

AMAZON RDMA EFA DRIVER
M:	Gal Pressman <galpress@amazon.com>
R:	Yossi Leybovich <sleybo@amazon.com>
L:	linux-rdma@vger.kernel.org
Q:	https://patchwork.kernel.org/project/linux-rdma/list/
S:	Supported
F:	drivers/infiniband/hw/efa/
F:	include/uapi/rdma/efa-abi.h

AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
M:	Tom Lendacky <thomas.lendacky@amd.com>
M:	Gary Hook <gary.hook@amd.com>
@@ -4279,7 +4288,7 @@ S: Supported
F:	drivers/scsi/cxgbi/cxgb3i

CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
M:	Steve Wise <swise@chelsio.com>
M:	Potnuri Bharat Teja <bharat@chelsio.com>
L:	linux-rdma@vger.kernel.org
W:	http://www.openfabrics.org
S:	Supported
@@ -4308,7 +4317,7 @@ S: Supported
F:	drivers/scsi/cxgbi/cxgb4i

CXGB4 IWARP RNIC DRIVER (IW_CXGB4)
M:	Steve Wise <swise@chelsio.com>
M:	Potnuri Bharat Teja <bharat@chelsio.com>
L:	linux-rdma@vger.kernel.org
W:	http://www.openfabrics.org
S:	Supported
@@ -7727,6 +7736,10 @@ F: drivers/infiniband/
F:	include/uapi/linux/if_infiniband.h
F:	include/uapi/rdma/
F:	include/rdma/
F:	include/trace/events/ib_mad.h
F:	include/trace/events/ib_umad.h
F:	samples/bpf/ibumad_kern.c
F:	samples/bpf/ibumad_user.c

INGENIC JZ4780 DMA Driver
M:	Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
+1 −0
Original line number Diff line number Diff line
@@ -93,6 +93,7 @@ source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/i40iw/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
+1 −0
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@
#include <net/ipv6_stubs.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_sa.h>
#include <rdma/ib.h>
#include <rdma/rdma_netlink.h>
+115 −30
Original line number Diff line number Diff line
@@ -78,11 +78,22 @@ enum gid_table_entry_state {
	GID_TABLE_ENTRY_PENDING_DEL	= 3,
};

struct roce_gid_ndev_storage {
	struct rcu_head rcu_head;
	struct net_device *ndev;
};

struct ib_gid_table_entry {
	struct kref			kref;
	struct work_struct		del_work;
	struct ib_gid_attr		attr;
	void				*context;
	/* Store the ndev pointer to release reference later on in
	 * call_rcu context because by that time gid_table_entry
	 * and attr might be already freed. So keep a copy of it.
	 * ndev_storage is freed by rcu callback.
	 */
	struct roce_gid_ndev_storage	*ndev_storage;
	enum gid_table_entry_state	state;
};

@@ -206,6 +217,20 @@ static void schedule_free_gid(struct kref *kref)
	queue_work(ib_wq, &entry->del_work);
}

static void put_gid_ndev(struct rcu_head *head)
{
	struct roce_gid_ndev_storage *storage =
		container_of(head, struct roce_gid_ndev_storage, rcu_head);

	WARN_ON(!storage->ndev);
	/* At this point its safe to release netdev reference,
	 * as all callers working on gid_attr->ndev are done
	 * using this netdev.
	 */
	dev_put(storage->ndev);
	kfree(storage);
}

static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
{
	struct ib_device *device = entry->attr.device;
@@ -228,8 +253,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
	/* Now this index is ready to be allocated */
	write_unlock_irq(&table->rwlock);

	if (entry->attr.ndev)
		dev_put(entry->attr.ndev);
	if (entry->ndev_storage)
		call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev);
	kfree(entry);
}

@@ -266,14 +291,25 @@ static struct ib_gid_table_entry *
alloc_gid_entry(const struct ib_gid_attr *attr)
{
	struct ib_gid_table_entry *entry;
	struct net_device *ndev;

	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
	if (!entry)
		return NULL;

	ndev = rcu_dereference_protected(attr->ndev, 1);
	if (ndev) {
		entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage),
					      GFP_KERNEL);
		if (!entry->ndev_storage) {
			kfree(entry);
			return NULL;
		}
		dev_hold(ndev);
		entry->ndev_storage->ndev = ndev;
	}
	kref_init(&entry->kref);
	memcpy(&entry->attr, attr, sizeof(*attr));
	if (entry->attr.ndev)
		dev_hold(entry->attr.ndev);
	INIT_WORK(&entry->del_work, free_gid_work);
	entry->state = GID_TABLE_ENTRY_INVALID;
	return entry;
@@ -343,6 +379,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
static void del_gid(struct ib_device *ib_dev, u8 port,
		    struct ib_gid_table *table, int ix)
{
	struct roce_gid_ndev_storage *ndev_storage;
	struct ib_gid_table_entry *entry;

	lockdep_assert_held(&table->lock);
@@ -360,6 +397,13 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
		table->data_vec[ix] = NULL;
	write_unlock_irq(&table->rwlock);

	ndev_storage = entry->ndev_storage;
	if (ndev_storage) {
		entry->ndev_storage = NULL;
		rcu_assign_pointer(entry->attr.ndev, NULL);
		call_rcu(&ndev_storage->rcu_head, put_gid_ndev);
	}

	if (rdma_cap_roce_gid_table(ib_dev, port))
		ib_dev->ops.del_gid(&entry->attr, &entry->context);

@@ -543,30 +587,11 @@ out_unlock:
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
		     union ib_gid *gid, struct ib_gid_attr *attr)
{
	struct net_device *idev;
	unsigned long mask;
	int ret;

	idev = ib_device_get_netdev(ib_dev, port);
	if (idev && attr->ndev != idev) {
		union ib_gid default_gid;

		/* Adding default GIDs is not permitted */
		make_default_gid(idev, &default_gid);
		if (!memcmp(gid, &default_gid, sizeof(*gid))) {
			dev_put(idev);
			return -EPERM;
		}
	}
	if (idev)
		dev_put(idev);

	mask = GID_ATTR_FIND_MASK_GID |
	unsigned long mask = GID_ATTR_FIND_MASK_GID |
			     GID_ATTR_FIND_MASK_GID_TYPE |
			     GID_ATTR_FIND_MASK_NETDEV;

	ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
	return ret;
	return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
}

static int
@@ -1263,11 +1288,72 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)

	read_lock_irqsave(&table->rwlock, flags);
	valid = is_gid_entry_valid(table->data_vec[attr->index]);
	if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
		ndev = attr->ndev;
	if (valid) {
		ndev = rcu_dereference(attr->ndev);
		if (!ndev ||
		    (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
			ndev = ERR_PTR(-ENODEV);
	}
	read_unlock_irqrestore(&table->rwlock, flags);
	return ndev;
}
EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);

static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
{
	u16 *vlan_id = data;

	if (is_vlan_dev(lower_dev))
		*vlan_id = vlan_dev_vlan_id(lower_dev);

	/* We are interested only in first level vlan device, so
	 * always return 1 to stop iterating over next level devices.
	 */
	return 1;
}

/**
 * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
 *			     of a GID entry.
 *
 * @attr:	GID attribute pointer whose L2 fields to be read
 * @vlan_id:	Pointer to vlan id to fill up if the GID entry has
 *		vlan id. It is optional.
 * @smac:	Pointer to smac to fill up for a GID entry. It is optional.
 *
 * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
 * (if gid entry has vlan) and source MAC, or returns error.
 */
int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
			    u16 *vlan_id, u8 *smac)
{
	struct net_device *ndev;

	rcu_read_lock();
	ndev = rcu_dereference(attr->ndev);
	if (!ndev) {
		rcu_read_unlock();
		return -ENODEV;
	}
	if (smac)
		ether_addr_copy(smac, ndev->dev_addr);
	if (vlan_id) {
		*vlan_id = 0xffff;
		if (is_vlan_dev(ndev)) {
			*vlan_id = vlan_dev_vlan_id(ndev);
		} else {
			/* If the netdev is upper device and if it's lower
			 * device is vlan device, consider vlan id of the
			 * the lower vlan device for this gid entry.
			 */
			netdev_walk_all_lower_dev_rcu(attr->ndev,
					get_lower_dev_vlan, vlan_id);
		}
	}
	rcu_read_unlock();
	return 0;
}
EXPORT_SYMBOL(rdma_read_gid_l2_fields);

static int config_non_roce_gid_cache(struct ib_device *device,
				     u8 port, int gid_tbl_len)
@@ -1392,7 +1478,6 @@ static void ib_cache_event(struct ib_event_handler *handler,
	    event->event == IB_EVENT_PORT_ACTIVE ||
	    event->event == IB_EVENT_LID_CHANGE  ||
	    event->event == IB_EVENT_PKEY_CHANGE ||
	    event->event == IB_EVENT_SM_CHANGE   ||
	    event->event == IB_EVENT_CLIENT_REREGISTER ||
	    event->event == IB_EVENT_GID_CHANGE) {
		work = kmalloc(sizeof *work, GFP_ATOMIC);
+29 −65
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
#include "core_priv.h"

MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
@@ -124,7 +125,8 @@ static struct ib_cm {
	struct rb_root remote_qp_table;
	struct rb_root remote_id_table;
	struct rb_root remote_sidr_table;
	struct idr local_id_table;
	struct xarray local_id_table;
	u32 local_id_next;
	__be32 random_id_operand;
	struct list_head timewait_list;
	struct workqueue_struct *wq;
@@ -219,7 +221,6 @@ struct cm_port {
struct cm_device {
	struct list_head list;
	struct ib_device *ib_device;
	struct device *device;
	u8 ack_delay;
	int going_down;
	struct cm_port *port[0];
@@ -598,35 +599,31 @@ static int cm_init_av_by_path(struct sa_path_rec *path,

static int cm_alloc_id(struct cm_id_private *cm_id_priv)
{
	unsigned long flags;
	int id;

	idr_preload(GFP_KERNEL);
	spin_lock_irqsave(&cm.lock, flags);

	id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
	int err;
	u32 id;

	spin_unlock_irqrestore(&cm.lock, flags);
	idr_preload_end();
	err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
			xa_limit_32b, &cm.local_id_next, GFP_KERNEL);

	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
	return id < 0 ? id : 0;
	return err;
}

static u32 cm_local_id(__be32 local_id)
{
	return (__force u32) (local_id ^ cm.random_id_operand);
}

static void cm_free_id(__be32 local_id)
{
	spin_lock_irq(&cm.lock);
	idr_remove(&cm.local_id_table,
		   (__force int) (local_id ^ cm.random_id_operand));
	spin_unlock_irq(&cm.lock);
	xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
}

static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
{
	struct cm_id_private *cm_id_priv;

	cm_id_priv = idr_find(&cm.local_id_table,
			      (__force int) (local_id ^ cm.random_id_operand));
	cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
	if (cm_id_priv) {
		if (cm_id_priv->id.remote_id == remote_id)
			atomic_inc(&cm_id_priv->refcount);
@@ -1988,11 +1985,12 @@ static int cm_req_handler(struct cm_work *work)
	grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
	gid_attr = grh->sgid_attr;

	if (gid_attr && gid_attr->ndev) {
	if (gid_attr &&
	    rdma_protocol_roce(work->port->cm_dev->ib_device,
			       work->port->port_num)) {
		work->path[0].rec_type =
			sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
	} else {
		/* If no GID attribute or ndev is null, it is not RoCE. */
		cm_path_set_rec_type(work->port->cm_dev->ib_device,
				     work->port->port_num,
				     &work->path[0],
@@ -2824,9 +2822,8 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
			spin_unlock_irq(&cm.lock);
			return NULL;
		}
		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
				      (timewait_info->work.local_id ^
				       cm.random_id_operand));
		cm_id_priv = xa_load(&cm.local_id_table,
				cm_local_id(timewait_info->work.local_id));
		if (cm_id_priv) {
			if (cm_id_priv->id.remote_id == remote_id)
				atomic_inc(&cm_id_priv->refcount);
@@ -4276,18 +4273,6 @@ static struct kobj_type cm_counter_obj_type = {
	.default_attrs = cm_counter_default_attrs
};

static void cm_release_port_obj(struct kobject *obj)
{
	struct cm_port *cm_port;

	cm_port = container_of(obj, struct cm_port, port_obj);
	kfree(cm_port);
}

static struct kobj_type cm_port_obj_type = {
	.release = cm_release_port_obj
};

static char *cm_devnode(struct device *dev, umode_t *mode)
{
	if (mode)
@@ -4306,19 +4291,12 @@ static int cm_create_port_fs(struct cm_port *port)
{
	int i, ret;

	ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
				   &port->cm_dev->device->kobj,
				   "%d", port->port_num);
	if (ret) {
		kfree(port);
		return ret;
	}

	for (i = 0; i < CM_COUNTER_GROUPS; i++) {
		ret = kobject_init_and_add(&port->counter_group[i].obj,
		ret = ib_port_register_module_stat(port->cm_dev->ib_device,
						   port->port_num,
						   &port->counter_group[i].obj,
						   &cm_counter_obj_type,
					   &port->port_obj,
					   "%s", counter_group_names[i]);
						   counter_group_names[i]);
		if (ret)
			goto error;
	}
@@ -4327,8 +4305,7 @@ static int cm_create_port_fs(struct cm_port *port)

error:
	while (i--)
		kobject_put(&port->counter_group[i].obj);
	kobject_put(&port->port_obj);
		ib_port_unregister_module_stat(&port->counter_group[i].obj);
	return ret;

}
@@ -4338,9 +4315,8 @@ static void cm_remove_port_fs(struct cm_port *port)
	int i;

	for (i = 0; i < CM_COUNTER_GROUPS; i++)
		kobject_put(&port->counter_group[i].obj);
		ib_port_unregister_module_stat(&port->counter_group[i].obj);

	kobject_put(&port->port_obj);
}

static void cm_add_one(struct ib_device *ib_device)
@@ -4367,13 +4343,6 @@ static void cm_add_one(struct ib_device *ib_device)
	cm_dev->ib_device = ib_device;
	cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
	cm_dev->going_down = 0;
	cm_dev->device = device_create(&cm_class, &ib_device->dev,
				       MKDEV(0, 0), NULL,
				       "%s", dev_name(&ib_device->dev));
	if (IS_ERR(cm_dev->device)) {
		kfree(cm_dev);
		return;
	}

	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
	for (i = 1; i <= ib_device->phys_port_cnt; i++) {
@@ -4440,7 +4409,6 @@ error1:
		cm_remove_port_fs(port);
	}
free:
	device_unregister(cm_dev->device);
	kfree(cm_dev);
}

@@ -4494,7 +4462,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
		cm_remove_port_fs(port);
	}

	device_unregister(cm_dev->device);
	kfree(cm_dev);
}

@@ -4502,7 +4469,6 @@ static int __init ib_cm_init(void)
{
	int ret;

	memset(&cm, 0, sizeof cm);
	INIT_LIST_HEAD(&cm.device_list);
	rwlock_init(&cm.device_lock);
	spin_lock_init(&cm.lock);
@@ -4512,7 +4478,7 @@ static int __init ib_cm_init(void)
	cm.remote_id_table = RB_ROOT;
	cm.remote_qp_table = RB_ROOT;
	cm.remote_sidr_table = RB_ROOT;
	idr_init(&cm.local_id_table);
	xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
	INIT_LIST_HEAD(&cm.timewait_list);

@@ -4538,7 +4504,6 @@ error3:
error2:
	class_unregister(&cm_class);
error1:
	idr_destroy(&cm.local_id_table);
	return ret;
}

@@ -4560,9 +4525,8 @@ static void __exit ib_cm_cleanup(void)
	}

	class_unregister(&cm_class);
	idr_destroy(&cm.local_id_table);
	WARN_ON(!xa_empty(&cm.local_id_table));
}

module_init(ib_cm_init);
module_exit(ib_cm_cleanup);
Loading