Commit 24df31f8 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'vsock-add-multi-transports-support'

Stefano Garzarella says:

====================
vsock: add multi-transports support

Most of the patches are reviewed by Dexuan, Stefan, and Jorgen.
The following patches need reviews:
- [11/15] vsock: add multi-transports support
- [12/15] vsock/vmci: register vmci_transport only when VMCI guest/host
          are active
- [15/15] vhost/vsock: refuse CID assigned to the guest->host transport

RFC: https://patchwork.ozlabs.org/cover/1168442/
v1: https://patchwork.ozlabs.org/cover/1181986/

v1 -> v2:
- Patch 11:
    + vmci_transport: sent reset when vsock_assign_transport() fails
      [Jorgen]
    + fixed loopback in the guests, checking if the remote_addr is the
      same of transport_g2h->get_local_cid()
    + virtio_transport_common: updated space available while creating
      the new child socket during a connection request
- Patch 12:
    + removed 'features' variable in vmci_transport_init() [Stefan]
    + added a flag to register only once the host [Jorgen]
- Added patch 15 to refuse CID assigned to the guest->host transport in
  the vhost_transport

This series adds the multi-transports support to vsock, following
this proposal: https://www.spinics.net/lists/netdev/msg575792.html



With the multi-transports support, we can use VSOCK with nested VMs
(using also different hypervisors) loading both guest->host and
host->guest transports at the same time.
Before this series, vmci_transport supported this behavior but only
using VMware hypervisor on L0, L1, etc.

The first 9 patches are cleanups and preparations, maybe some of
these can go regardless of this series.

Patch 10 changes the hvs_remote_addr_init(). setting the
VMADDR_CID_HOST as remote CID instead of VMADDR_CID_ANY to make
the choice of transport to be used work properly.

Patch 11 adds multi-transports support.

Patch 12 changes a little bit the vmci_transport and the vmci driver
to register the vmci_transport only when there are active host/guest.

Patch 13 prevents the transport modules unloading while sockets are
assigned to them.

Patch 14 fixes an issue in the bind() logic discoverable only with
the new multi-transport support.

Patch 15 refuses CID assigned to the guest->host transport in the
vhost_transport.

I've tested this series with nested KVM (vsock-transport [L0,L1],
virtio-transport[L1,L2]) and with VMware (L0) + KVM (L1)
(vmci-transport [L0,L1], vhost-transport [L1], virtio-transport[L2]).

Dexuan successfully tested the RFC series on HyperV with a Linux guest.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 798a496b ed8640a9
Loading
Loading
Loading
Loading
+67 −0
Original line number Diff line number Diff line
@@ -28,6 +28,10 @@ MODULE_PARM_DESC(disable_guest,
static bool vmci_guest_personality_initialized;
static bool vmci_host_personality_initialized;

static DEFINE_MUTEX(vmci_vsock_mutex); /* protects vmci_vsock_transport_cb */
static vmci_vsock_cb vmci_vsock_transport_cb;
bool vmci_vsock_cb_host_called;

/*
 * vmci_get_context_id() - Gets the current context ID.
 *
@@ -45,6 +49,69 @@ u32 vmci_get_context_id(void)
}
EXPORT_SYMBOL_GPL(vmci_get_context_id);

/*
 * vmci_register_vsock_callback() - Register the VSOCK vmci_transport callback.
 *
 * The callback will be called when the first host or guest becomes active,
 * or if they are already active when this function is called.
 * To unregister the callback, call this function with NULL parameter.
 *
 * Returns 0 on success. -EBUSY if a callback is already registered.
 */
int vmci_register_vsock_callback(vmci_vsock_cb callback)
{
	int err = 0;

	mutex_lock(&vmci_vsock_mutex);

	if (vmci_vsock_transport_cb && callback) {
		err = -EBUSY;
		goto out;
	}

	vmci_vsock_transport_cb = callback;

	if (!vmci_vsock_transport_cb) {
		vmci_vsock_cb_host_called = false;
		goto out;
	}

	if (vmci_guest_code_active())
		vmci_vsock_transport_cb(false);

	if (vmci_host_users() > 0) {
		vmci_vsock_cb_host_called = true;
		vmci_vsock_transport_cb(true);
	}

out:
	mutex_unlock(&vmci_vsock_mutex);
	return err;
}
EXPORT_SYMBOL_GPL(vmci_register_vsock_callback);

void vmci_call_vsock_callback(bool is_host)
{
	mutex_lock(&vmci_vsock_mutex);

	if (!vmci_vsock_transport_cb)
		goto out;

	/* In the host, this function could be called multiple times,
	 * but we want to register it only once.
	 */
	if (is_host) {
		if (vmci_vsock_cb_host_called)
			goto out;

		vmci_vsock_cb_host_called = true;
	}

	vmci_vsock_transport_cb(is_host);
out:
	mutex_unlock(&vmci_vsock_mutex);
}

static int __init vmci_drv_init(void)
{
	int vmci_err;
+2 −0
Original line number Diff line number Diff line
@@ -36,10 +36,12 @@ extern struct pci_dev *vmci_pdev;

u32 vmci_get_context_id(void);
int vmci_send_datagram(struct vmci_datagram *dg);
void vmci_call_vsock_callback(bool is_host);

int vmci_host_init(void);
void vmci_host_exit(void);
bool vmci_host_code_active(void);
int vmci_host_users(void);

int vmci_guest_init(void);
void vmci_guest_exit(void);
+2 −0
Original line number Diff line number Diff line
@@ -637,6 +637,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
		  vmci_dev->iobase + VMCI_CONTROL_ADDR);

	pci_set_drvdata(pdev, vmci_dev);

	vmci_call_vsock_callback(false);
	return 0;

err_free_irq:
+7 −0
Original line number Diff line number Diff line
@@ -108,6 +108,11 @@ bool vmci_host_code_active(void)
	     atomic_read(&vmci_host_active_users) > 0);
}

int vmci_host_users(void)
{
	return atomic_read(&vmci_host_active_users);
}

/*
 * Called on open of /dev/vmci.
 */
@@ -338,6 +343,8 @@ static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev,
	vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
	atomic_inc(&vmci_host_active_users);

	vmci_call_vsock_callback(true);

	retval = 0;

out:
+53 −49
Original line number Diff line number Diff line
@@ -384,6 +384,49 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
	return val < vq->num;
}

static struct virtio_transport vhost_transport = {
	.transport = {
		.module                   = THIS_MODULE,

		.get_local_cid            = vhost_transport_get_local_cid,

		.init                     = virtio_transport_do_socket_init,
		.destruct                 = virtio_transport_destruct,
		.release                  = virtio_transport_release,
		.connect                  = virtio_transport_connect,
		.shutdown                 = virtio_transport_shutdown,
		.cancel_pkt               = vhost_transport_cancel_pkt,

		.dgram_enqueue            = virtio_transport_dgram_enqueue,
		.dgram_dequeue            = virtio_transport_dgram_dequeue,
		.dgram_bind               = virtio_transport_dgram_bind,
		.dgram_allow              = virtio_transport_dgram_allow,

		.stream_enqueue           = virtio_transport_stream_enqueue,
		.stream_dequeue           = virtio_transport_stream_dequeue,
		.stream_has_data          = virtio_transport_stream_has_data,
		.stream_has_space         = virtio_transport_stream_has_space,
		.stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
		.stream_is_active         = virtio_transport_stream_is_active,
		.stream_allow             = virtio_transport_stream_allow,

		.notify_poll_in           = virtio_transport_notify_poll_in,
		.notify_poll_out          = virtio_transport_notify_poll_out,
		.notify_recv_init         = virtio_transport_notify_recv_init,
		.notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
		.notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
		.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
		.notify_send_init         = virtio_transport_notify_send_init,
		.notify_send_pre_block    = virtio_transport_notify_send_pre_block,
		.notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
		.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
		.notify_buffer_size       = virtio_transport_notify_buffer_size,

	},

	.send_pkt = vhost_transport_send_pkt,
};

static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -438,7 +481,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)

		/* Only accept correctly addressed packets */
		if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
			virtio_transport_recv_pkt(pkt);
			virtio_transport_recv_pkt(&vhost_transport, pkt);
		else
			virtio_transport_free_pkt(pkt);

@@ -675,6 +718,12 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
	if (guest_cid > U32_MAX)
		return -EINVAL;

	/* Refuse if CID is assigned to the guest->host transport (i.e. nested
	 * VM), to make the loopback work.
	 */
	if (vsock_find_cid(guest_cid))
		return -EADDRINUSE;

	/* Refuse if CID is already in use */
	mutex_lock(&vhost_vsock_mutex);
	other = vhost_vsock_get(guest_cid);
@@ -786,57 +835,12 @@ static struct miscdevice vhost_vsock_misc = {
	.fops = &vhost_vsock_fops,
};

static struct virtio_transport vhost_transport = {
	.transport = {
		.get_local_cid            = vhost_transport_get_local_cid,

		.init                     = virtio_transport_do_socket_init,
		.destruct                 = virtio_transport_destruct,
		.release                  = virtio_transport_release,
		.connect                  = virtio_transport_connect,
		.shutdown                 = virtio_transport_shutdown,
		.cancel_pkt               = vhost_transport_cancel_pkt,

		.dgram_enqueue            = virtio_transport_dgram_enqueue,
		.dgram_dequeue            = virtio_transport_dgram_dequeue,
		.dgram_bind               = virtio_transport_dgram_bind,
		.dgram_allow              = virtio_transport_dgram_allow,

		.stream_enqueue           = virtio_transport_stream_enqueue,
		.stream_dequeue           = virtio_transport_stream_dequeue,
		.stream_has_data          = virtio_transport_stream_has_data,
		.stream_has_space         = virtio_transport_stream_has_space,
		.stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
		.stream_is_active         = virtio_transport_stream_is_active,
		.stream_allow             = virtio_transport_stream_allow,

		.notify_poll_in           = virtio_transport_notify_poll_in,
		.notify_poll_out          = virtio_transport_notify_poll_out,
		.notify_recv_init         = virtio_transport_notify_recv_init,
		.notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
		.notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
		.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
		.notify_send_init         = virtio_transport_notify_send_init,
		.notify_send_pre_block    = virtio_transport_notify_send_pre_block,
		.notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
		.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,

		.set_buffer_size          = virtio_transport_set_buffer_size,
		.set_min_buffer_size      = virtio_transport_set_min_buffer_size,
		.set_max_buffer_size      = virtio_transport_set_max_buffer_size,
		.get_buffer_size          = virtio_transport_get_buffer_size,
		.get_min_buffer_size      = virtio_transport_get_min_buffer_size,
		.get_max_buffer_size      = virtio_transport_get_max_buffer_size,
	},

	.send_pkt = vhost_transport_send_pkt,
};

static int __init vhost_vsock_init(void)
{
	int ret;

	ret = vsock_core_init(&vhost_transport.transport);
	ret = vsock_core_register(&vhost_transport.transport,
				  VSOCK_TRANSPORT_F_H2G);
	if (ret < 0)
		return ret;
	return misc_register(&vhost_vsock_misc);
@@ -845,7 +849,7 @@ static int __init vhost_vsock_init(void)
static void __exit vhost_vsock_exit(void)
{
	misc_deregister(&vhost_vsock_misc);
	vsock_core_exit();
	vsock_core_unregister(&vhost_transport.transport);
};

module_init(vhost_vsock_init);
Loading