Commit c09c1474 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-vhost-improve-performance-when-enable-busyloop'



Tonghao Zhang says:

====================
net: vhost: improve performance when enable busyloop

This patches improve the guest receive performance.
On the handle_tx side, we poll the sock receive queue
at the same time. handle_rx do that in the same way.

For more performance report, see patch 4
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents af4325ec 441abde4
Loading
Loading
Loading
Loading
+85 −62
Original line number Diff line number Diff line
@@ -480,33 +480,94 @@ signal_used:
	nvq->batched_xdp = 0;
}

static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
				    struct vhost_net_virtqueue *nvq,
				    unsigned int *out_num, unsigned int *in_num,
				    struct msghdr *msghdr, bool *busyloop_intr)
static int sock_has_rx_data(struct socket *sock)
{
	struct vhost_virtqueue *vq = &nvq->vq;
	unsigned long uninitialized_var(endtime);
	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
				  out_num, in_num, NULL, NULL);
	if (unlikely(!sock))
		return 0;

	if (sock->ops->peek_len)
		return sock->ops->peek_len(sock);

	return skb_queue_empty(&sock->sk->sk_receive_queue);
}

static void vhost_net_busy_poll_try_queue(struct vhost_net *net,
					  struct vhost_virtqueue *vq)
{
	if (!vhost_vq_avail_empty(&net->dev, vq)) {
		vhost_poll_queue(&vq->poll);
	} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
		vhost_disable_notify(&net->dev, vq);
		vhost_poll_queue(&vq->poll);
	}
}

static void vhost_net_busy_poll(struct vhost_net *net,
				struct vhost_virtqueue *rvq,
				struct vhost_virtqueue *tvq,
				bool *busyloop_intr,
				bool poll_rx)
{
	unsigned long busyloop_timeout;
	unsigned long endtime;
	struct socket *sock;
	struct vhost_virtqueue *vq = poll_rx ? tvq : rvq;

	mutex_lock_nested(&vq->mutex, poll_rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX);
	vhost_disable_notify(&net->dev, vq);
	sock = rvq->private_data;

	busyloop_timeout = poll_rx ? rvq->busyloop_timeout:
				     tvq->busyloop_timeout;

	if (r == vq->num && vq->busyloop_timeout) {
		/* Flush batched packets first */
		if (!vhost_sock_zcopy(vq->private_data))
			vhost_tx_batch(net, nvq, vq->private_data, msghdr);
	preempt_disable();
		endtime = busy_clock() + vq->busyloop_timeout;
	endtime = busy_clock() + busyloop_timeout;

	while (vhost_can_busy_poll(endtime)) {
			if (vhost_has_work(vq->dev)) {
		if (vhost_has_work(&net->dev)) {
			*busyloop_intr = true;
			break;
		}
			if (!vhost_vq_avail_empty(vq->dev, vq))

		if ((sock_has_rx_data(sock) &&
		     !vhost_vq_avail_empty(&net->dev, rvq)) ||
		    !vhost_vq_avail_empty(&net->dev, tvq))
			break;

		cpu_relax();
	}

	preempt_enable();
		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),

	if (poll_rx || sock_has_rx_data(sock))
		vhost_net_busy_poll_try_queue(net, vq);
	else if (!poll_rx) /* On tx here, sock has no rx data. */
		vhost_enable_notify(&net->dev, rvq);

	mutex_unlock(&vq->mutex);
}

static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
				    struct vhost_net_virtqueue *tnvq,
				    unsigned int *out_num, unsigned int *in_num,
				    struct msghdr *msghdr, bool *busyloop_intr)
{
	struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
	struct vhost_virtqueue *rvq = &rnvq->vq;
	struct vhost_virtqueue *tvq = &tnvq->vq;

	int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
				  out_num, in_num, NULL, NULL);

	if (r == tvq->num && tvq->busyloop_timeout) {
		/* Flush batched packets first */
		if (!vhost_sock_zcopy(tvq->private_data))
			// vhost_net_signal_used(tnvq);
			vhost_tx_batch(net, tnvq, tvq->private_data, msghdr);

		vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false);

		r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
				      out_num, in_num, NULL, NULL);
	}

@@ -856,7 +917,7 @@ static void handle_tx(struct vhost_net *net)
	struct vhost_virtqueue *vq = &nvq->vq;
	struct socket *sock;

	mutex_lock(&vq->mutex);
	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
	sock = vq->private_data;
	if (!sock)
		goto out;
@@ -897,16 +958,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
	return len;
}

static int sk_has_rx_data(struct sock *sk)
{
	struct socket *sock = sk->sk_socket;

	if (sock->ops->peek_len)
		return sock->ops->peek_len(sock);

	return skb_queue_empty(&sk->sk_receive_queue);
}

static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
				      bool *busyloop_intr)
{
@@ -914,41 +965,13 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
	struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
	struct vhost_virtqueue *rvq = &rnvq->vq;
	struct vhost_virtqueue *tvq = &tnvq->vq;
	unsigned long uninitialized_var(endtime);
	int len = peek_head_len(rnvq, sk);

	if (!len && tvq->busyloop_timeout) {
	if (!len && rvq->busyloop_timeout) {
		/* Flush batched heads first */
		vhost_net_signal_used(rnvq);
		/* Both tx vq and rx socket were polled here */
		mutex_lock_nested(&tvq->mutex, 1);
		vhost_disable_notify(&net->dev, tvq);

		preempt_disable();
		endtime = busy_clock() + tvq->busyloop_timeout;

		while (vhost_can_busy_poll(endtime)) {
			if (vhost_has_work(&net->dev)) {
				*busyloop_intr = true;
				break;
			}
			if ((sk_has_rx_data(sk) &&
			     !vhost_vq_avail_empty(&net->dev, rvq)) ||
			    !vhost_vq_avail_empty(&net->dev, tvq))
				break;
			cpu_relax();
		}

		preempt_enable();

		if (!vhost_vq_avail_empty(&net->dev, tvq)) {
			vhost_poll_queue(&tvq->poll);
		} else if (unlikely(vhost_enable_notify(&net->dev, tvq))) {
			vhost_disable_notify(&net->dev, tvq);
			vhost_poll_queue(&tvq->poll);
		}

		mutex_unlock(&tvq->mutex);
		vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);

		len = peek_head_len(rnvq, sk);
	}
@@ -1063,7 +1086,7 @@ static void handle_rx(struct vhost_net *net)
	__virtio16 num_buffers;
	int recv_pkts = 0;

	mutex_lock_nested(&vq->mutex, 0);
	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
	sock = vq->private_data;
	if (!sock)
		goto out;
+7 −17
Original line number Diff line number Diff line
@@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
{
	int i;

	for (i = 0; i < d->nvqs; ++i)
	for (i = 0; i < d->nvqs; ++i) {
		mutex_lock(&d->vqs[i]->mutex);
		__vhost_vq_meta_reset(d->vqs[i]);
		mutex_unlock(&d->vqs[i]->mutex);
	}
}

static void vhost_vq_reset(struct vhost_dev *dev,
@@ -891,20 +894,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
#define vhost_get_used(vq, x, ptr) \
	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)

static void vhost_dev_lock_vqs(struct vhost_dev *d)
{
	int i = 0;
	for (i = 0; i < d->nvqs; ++i)
		mutex_lock_nested(&d->vqs[i]->mutex, i);
}

static void vhost_dev_unlock_vqs(struct vhost_dev *d)
{
	int i = 0;
	for (i = 0; i < d->nvqs; ++i)
		mutex_unlock(&d->vqs[i]->mutex);
}

static int vhost_new_umem_range(struct vhost_umem *umem,
				u64 start, u64 size, u64 end,
				u64 userspace_addr, int perm)
@@ -954,7 +943,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
		if (msg->iova <= vq_msg->iova &&
		    msg->iova + msg->size - 1 >= vq_msg->iova &&
		    vq_msg->type == VHOST_IOTLB_MISS) {
			mutex_lock(&node->vq->mutex);
			vhost_poll_queue(&node->vq->poll);
			mutex_unlock(&node->vq->mutex);

			list_del(&node->node);
			kfree(node);
		}
@@ -986,7 +978,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
	int ret = 0;

	mutex_lock(&dev->mutex);
	vhost_dev_lock_vqs(dev);
	switch (msg->type) {
	case VHOST_IOTLB_UPDATE:
		if (!dev->iotlb) {
@@ -1020,7 +1011,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
		break;
	}

	vhost_dev_unlock_vqs(dev);
	mutex_unlock(&dev->mutex);

	return ret;