Commit 5ec9d8ee authored by Yishai Hadas's avatar Yishai Hadas Committed by Jason Gunthorpe
Browse files

IB/mlx5: Implement DEVX dispatching event



Implement DEVX dispatching event by looking up for the applicable
subscriptions for the reported event and using their target fd to
signal/set the event.

Signed-off-by: default avatarYishai Hadas <yishaih@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 75973853
Loading
Loading
Loading
Loading
+300 −3
Original line number Diff line number Diff line
@@ -34,6 +34,11 @@ struct devx_async_data {
	struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
};

struct devx_async_event_data {
	struct list_head list; /* headed in ev_file->event_list */
	struct mlx5_ib_uapi_devx_async_event_hdr hdr;
};

/* first level XA value data structure */
struct devx_event {
	struct xarray object_ids; /* second XA level, Key = object id */
@@ -77,6 +82,8 @@ struct devx_async_event_file {
	struct list_head event_list;
	struct mlx5_ib_dev *dev;
	u8 omit_data:1;
	u8 is_overflow_err:1;
	u8 is_destroyed:1;
};

#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
@@ -289,6 +296,29 @@ static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
	}
}

static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
{
	switch (event_type) {
	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
	case MLX5_EVENT_TYPE_PATH_MIG:
	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
	case MLX5_EVENT_TYPE_COMM_EST:
	case MLX5_EVENT_TYPE_SQ_DRAINED:
	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
		return eqe->data.qp_srq.type;
	case MLX5_EVENT_TYPE_CQ_ERROR:
		return 0;
	case MLX5_EVENT_TYPE_DCT_DRAINED:
		return MLX5_EVENT_QUEUE_TYPE_DCT;
	default:
		return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
	}
}

static u32 get_dec_obj_id(u64 obj_id)
{
	return (obj_id & 0xffffffff);
@@ -2161,12 +2191,172 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
	return 0;
}

static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
				  unsigned long event_type)
{
	__be64 *unaff_events;
	int mask_entry;
	int mask_bit;

	if (!MLX5_CAP_GEN(dev, event_cap))
		return is_legacy_unaffiliated_event_num(event_type);

	unaff_events = MLX5_CAP_DEV_EVENT(dev,
					  user_unaffiliated_events);
	WARN_ON(event_type > MAX_SUPP_EVENT_NUM);

	mask_entry = event_type / 64;
	mask_bit = event_type % 64;

	if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
		return false;

	return true;
}

static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
{
	struct mlx5_eqe *eqe = data;
	u32 obj_id = 0;

	switch (event_type) {
	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
	case MLX5_EVENT_TYPE_PATH_MIG:
	case MLX5_EVENT_TYPE_COMM_EST:
	case MLX5_EVENT_TYPE_SQ_DRAINED:
	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
		obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
		break;
	case MLX5_EVENT_TYPE_DCT_DRAINED:
		obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
		break;
	case MLX5_EVENT_TYPE_CQ_ERROR:
		obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
		break;
	default:
		obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
		break;
	}

	return obj_id;
}

static int deliver_event(struct devx_event_subscription *event_sub,
			 const void *data)
{
	struct devx_async_event_file *ev_file;
	struct devx_async_event_data *event_data;
	unsigned long flags;

	ev_file = event_sub->ev_file;

	if (ev_file->omit_data) {
		spin_lock_irqsave(&ev_file->lock, flags);
		if (!list_empty(&event_sub->event_list)) {
			spin_unlock_irqrestore(&ev_file->lock, flags);
			return 0;
		}

		list_add_tail(&event_sub->event_list, &ev_file->event_list);
		spin_unlock_irqrestore(&ev_file->lock, flags);
		wake_up_interruptible(&ev_file->poll_wait);
		return 0;
	}

	event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
			     GFP_ATOMIC);
	if (!event_data) {
		spin_lock_irqsave(&ev_file->lock, flags);
		ev_file->is_overflow_err = 1;
		spin_unlock_irqrestore(&ev_file->lock, flags);
		return -ENOMEM;
	}

	event_data->hdr.cookie = event_sub->cookie;
	memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));

	spin_lock_irqsave(&ev_file->lock, flags);
	list_add_tail(&event_data->list, &ev_file->event_list);
	spin_unlock_irqrestore(&ev_file->lock, flags);
	wake_up_interruptible(&ev_file->poll_wait);

	return 0;
}

static void dispatch_event_fd(struct list_head *fd_list,
			      const void *data)
{
	struct devx_event_subscription *item;

	list_for_each_entry_rcu(item, fd_list, xa_list) {
		if (!get_file_rcu(item->filp))
			continue;

		if (item->eventfd) {
			eventfd_signal(item->eventfd, 1);
			fput(item->filp);
			continue;
		}

		deliver_event(item, data);
		fput(item->filp);
	}
}

static int devx_event_notifier(struct notifier_block *nb,
			       unsigned long event_type, void *data)
{
	struct mlx5_devx_event_table *table;
	struct mlx5_ib_dev *dev;
	struct devx_event *event;
	struct devx_obj_event *obj_event;
	u16 obj_type = 0;
	bool is_unaffiliated;
	u32 obj_id;

	/* Explicit filtering to kernel events which may occur frequently */
	if (event_type == MLX5_EVENT_TYPE_CMD ||
	    event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
		return NOTIFY_OK;

	table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
	dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
	is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);

	if (!is_unaffiliated)
		obj_type = get_event_obj_type(event_type, data);

	rcu_read_lock();
	event = xa_load(&table->event_xa, event_type | (obj_type << 16));
	if (!event) {
		rcu_read_unlock();
		return NOTIFY_DONE;
	}

	if (is_unaffiliated) {
		dispatch_event_fd(&event->unaffiliated_list, data);
		rcu_read_unlock();
		return NOTIFY_OK;
	}

	obj_id = devx_get_obj_id_from_event(event_type, data);
	obj_event = xa_load(&event->object_ids, obj_id);
	if (!obj_event) {
		rcu_read_unlock();
		return NOTIFY_DONE;
	}

	dispatch_event_fd(&obj_event->obj_sub_list, data);

	rcu_read_unlock();
	return NOTIFY_OK;
}

void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
{
	struct mlx5_devx_event_table *table = &dev->devx_event_table;
@@ -2299,19 +2489,108 @@ static const struct file_operations devx_async_cmd_event_fops = {
static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
				     size_t count, loff_t *pos)
{
	struct devx_async_event_file *ev_file = filp->private_data;
	struct devx_event_subscription *event_sub;
	struct devx_async_event_data *uninitialized_var(event);
	int ret = 0;
	size_t eventsz;
	bool omit_data;
	void *event_data;

	omit_data = ev_file->omit_data;

	spin_lock_irq(&ev_file->lock);

	if (ev_file->is_overflow_err) {
		ev_file->is_overflow_err = 0;
		spin_unlock_irq(&ev_file->lock);
		return -EOVERFLOW;
	}

	if (ev_file->is_destroyed) {
		spin_unlock_irq(&ev_file->lock);
		return -EIO;
	}

	while (list_empty(&ev_file->event_list)) {
		spin_unlock_irq(&ev_file->lock);

		if (filp->f_flags & O_NONBLOCK)
			return -EAGAIN;

		if (wait_event_interruptible(ev_file->poll_wait,
			    (!list_empty(&ev_file->event_list) ||
			     ev_file->is_destroyed))) {
			return -ERESTARTSYS;
		}

		spin_lock_irq(&ev_file->lock);
		if (ev_file->is_destroyed) {
			spin_unlock_irq(&ev_file->lock);
			return -EIO;
		}
	}

	if (omit_data) {
		event_sub = list_first_entry(&ev_file->event_list,
					struct devx_event_subscription,
					event_list);
		eventsz = sizeof(event_sub->cookie);
		event_data = &event_sub->cookie;
	} else {
		event = list_first_entry(&ev_file->event_list,
				      struct devx_async_event_data, list);
		eventsz = sizeof(struct mlx5_eqe) +
			sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
		event_data = &event->hdr;
	}

	if (eventsz > count) {
		spin_unlock_irq(&ev_file->lock);
		return -EINVAL;
	}

	if (omit_data)
		list_del_init(&event_sub->event_list);
	else
		list_del(&event->list);

	spin_unlock_irq(&ev_file->lock);

	if (copy_to_user(buf, event_data, eventsz))
		/* This points to an application issue, not a kernel concern */
		ret = -EFAULT;
	else
		ret = eventsz;

	if (!omit_data)
		kfree(event);
	return ret;
}

static __poll_t devx_async_event_poll(struct file *filp,
				      struct poll_table_struct *wait)
{
	return 0;
	struct devx_async_event_file *ev_file = filp->private_data;
	__poll_t pollflags = 0;

	poll_wait(filp, &ev_file->poll_wait, wait);

	spin_lock_irq(&ev_file->lock);
	if (ev_file->is_destroyed)
		pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
	else if (!list_empty(&ev_file->event_list))
		pollflags = EPOLLIN | EPOLLRDNORM;
	spin_unlock_irq(&ev_file->lock);

	return pollflags;
}

static int devx_async_event_close(struct inode *inode, struct file *filp)
{
	struct devx_async_event_file *ev_file = filp->private_data;
	struct devx_event_subscription *event_sub, *event_sub_tmp;
	struct devx_async_event_data *entry, *tmp;

	mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock);
	/* delete the subscriptions which are related to this FD */
@@ -2328,6 +2607,15 @@ static int devx_async_event_close(struct inode *inode, struct file *filp)

	mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock);

	/* free the pending events allocation */
	if (!ev_file->omit_data) {
		spin_lock_irq(&ev_file->lock);
		list_for_each_entry_safe(entry, tmp,
					 &ev_file->event_list, list)
			kfree(entry); /* read can't come any more */
		spin_unlock_irq(&ev_file->lock);
	}

	uverbs_close_fd(filp);
	put_device(&ev_file->dev->ib_dev.dev);
	return 0;
@@ -2363,6 +2651,15 @@ static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj,
					    enum rdma_remove_reason why)
{
	struct devx_async_event_file *ev_file =
		container_of(uobj, struct devx_async_event_file,
			     uobj);

	spin_lock_irq(&ev_file->lock);
	ev_file->is_destroyed = 1;
	spin_unlock_irq(&ev_file->lock);

	wake_up_interruptible(&ev_file->poll_wait);
	return 0;
};

+5 −0
Original line number Diff line number Diff line
@@ -67,5 +67,10 @@ enum mlx5_ib_uapi_devx_create_event_channel_flags {
	MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA = 1 << 0,
};

struct mlx5_ib_uapi_devx_async_event_hdr {
	__aligned_u64	cookie;
	__u8		out_data[];
};

#endif