Commit 0927e157 authored by Vasily Gorbik's avatar Vasily Gorbik
Browse files

Merge tag 'vfio-ccw-20200603-v2' of...

Merge tag 'vfio-ccw-20200603-v2' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw into features

vfio-ccw updates:
- accept requests without the prefetch bit set
- enable path handling via two new regions

* tag 'vfio-ccw-20200603-v2' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw:
  vfio-ccw: Add trace for CRW event
  vfio-ccw: Wire up the CRW irq and CRW region
  vfio-ccw: Introduce a new CRW region
  vfio-ccw: Refactor IRQ handlers
  vfio-ccw: Introduce a new schib region
  vfio-ccw: Refactor the unregister of the async regions
  vfio-ccw: Register a chp_event callback for vfio-ccw
  vfio-ccw: Introduce new helper functions to free/destroy regions
  vfio-ccw: document possible errors
  vfio-ccw: Enable transparent CCW IPL from DASD

Link: https://lkml.kernel.org/r/20200603112716.332801-1-cohuck@redhat.com


Signed-off-by: default avatarVasily Gorbik <gor@linux.ibm.com>
parents e1750a3d b2dd9a44
Loading
Loading
Loading
Loading
+97 −3
Original line number Diff line number Diff line
@@ -204,15 +204,44 @@ definition of the region is::
	  __u32   ret_code;
  } __packed;

This region is always available.

While starting an I/O request, orb_area should be filled with the
guest ORB, and scsw_area should be filled with the SCSW of the Virtual
Subchannel.

irb_area stores the I/O result.

ret_code stores a return code for each access of the region.
ret_code stores a return code for each access of the region. The following
values may occur:

``0``
  The operation was successful.

``-EOPNOTSUPP``
  The orb specified transport mode or an unidentified IDAW format, or the
  scsw specified a function other than the start function.

``-EIO``
  A request was issued while the device was not in a state ready to accept
  requests, or an internal error occurred.

``-EBUSY``
  The subchannel was status pending or busy, or a request is already active.

``-EAGAIN``
  A request was being processed, and the caller should retry.

``-EACCES``
  The channel path(s) used for the I/O were found to be not operational.

``-ENODEV``
  The device was found to be not operational.

``-EINVAL``
  The orb specified a chain longer than 255 ccws, or an internal error
  occurred.

This region is always available.

vfio-ccw cmd region
-------------------
@@ -231,6 +260,64 @@ This region is exposed via region type VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD.

Currently, CLEAR SUBCHANNEL and HALT SUBCHANNEL use this region.

command specifies the command to be issued; ret_code stores a return code
for each access of the region. The following values may occur:

``0``
  The operation was successful.

``-ENODEV``
  The device was found to be not operational.

``-EINVAL``
  A command other than halt or clear was specified.

``-EIO``
  A request was issued while the device was not in a state ready to accept
  requests.

``-EAGAIN``
  A request was being processed, and the caller should retry.

``-EBUSY``
  The subchannel was status pending or busy while processing a halt request.

vfio-ccw schib region
---------------------

The vfio-ccw schib region is used to return Subchannel-Information
Block (SCHIB) data to userspace::

  struct ccw_schib_region {
  #define SCHIB_AREA_SIZE 52
         __u8 schib_area[SCHIB_AREA_SIZE];
  } __packed;

This region is exposed via region type VFIO_REGION_SUBTYPE_CCW_SCHIB.

Reading this region triggers a STORE SUBCHANNEL to be issued to the
associated hardware.

vfio-ccw crw region
---------------------

The vfio-ccw crw region is used to return Channel Report Word (CRW)
data to userspace::

  struct ccw_crw_region {
         __u32 crw;
         __u32 pad;
  } __packed;

This region is exposed via region type VFIO_REGION_SUBTYPE_CCW_CRW.

Reading this region returns a CRW if one that is relevant for this
subchannel (e.g. one reporting changes in channel path state) is
pending, or all zeroes if not. If multiple CRWs are pending (including
possibly chained CRWs), reading this region again will return the next
one, until no more CRWs are pending and zeroes are returned. This is
similar to how STORE CHANNEL REPORT WORD works.

vfio-ccw operation details
--------------------------

@@ -333,7 +420,14 @@ through DASD/ECKD device online in a guest now and use it as a block
device.

The current code allows the guest to start channel programs via
START SUBCHANNEL, and to issue HALT SUBCHANNEL and CLEAR SUBCHANNEL.
START SUBCHANNEL, and to issue HALT SUBCHANNEL, CLEAR SUBCHANNEL,
and STORE SUBCHANNEL.

Currently all channel programs are prefetched, regardless of the
p-bit setting in the ORB.  As a result, self modifying channel
programs are not supported.  For this reason, IPL has to be handled as
a special case by a userspace/guest program; this has been implemented
in QEMU's s390-ccw bios as of QEMU 4.1.

vfio-ccw supports classic (command mode) channel I/O only. Transport
mode (HPF) is not supported.
+1 −1
Original line number Diff line number Diff line
@@ -21,5 +21,5 @@ qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
obj-$(CONFIG_QDIO) += qdio.o

vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
	vfio_ccw_async.o vfio_ccw_trace.o
	vfio_ccw_async.o vfio_ccw_trace.o vfio_ccw_chp.o
obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
+148 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * Channel path related status regions for vfio_ccw
 *
 * Copyright IBM Corp. 2020
 *
 * Author(s): Farhan Ali <alifm@linux.ibm.com>
 *            Eric Farman <farman@linux.ibm.com>
 */

#include <linux/vfio.h>
#include "vfio_ccw_private.h"

static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private,
					  char __user *buf, size_t count,
					  loff_t *ppos)
{
	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
	struct ccw_schib_region *region;
	int ret;

	if (pos + count > sizeof(*region))
		return -EINVAL;

	mutex_lock(&private->io_mutex);
	region = private->region[i].data;

	if (cio_update_schib(private->sch)) {
		ret = -ENODEV;
		goto out;
	}

	memcpy(region, &private->sch->schib, sizeof(*region));

	if (copy_to_user(buf, (void *)region + pos, count)) {
		ret = -EFAULT;
		goto out;
	}

	ret = count;

out:
	mutex_unlock(&private->io_mutex);
	return ret;
}

static ssize_t vfio_ccw_schib_region_write(struct vfio_ccw_private *private,
					   const char __user *buf, size_t count,
					   loff_t *ppos)
{
	return -EINVAL;
}


static void vfio_ccw_schib_region_release(struct vfio_ccw_private *private,
					  struct vfio_ccw_region *region)
{

}

const struct vfio_ccw_regops vfio_ccw_schib_region_ops = {
	.read = vfio_ccw_schib_region_read,
	.write = vfio_ccw_schib_region_write,
	.release = vfio_ccw_schib_region_release,
};

int vfio_ccw_register_schib_dev_regions(struct vfio_ccw_private *private)
{
	return vfio_ccw_register_dev_region(private,
					    VFIO_REGION_SUBTYPE_CCW_SCHIB,
					    &vfio_ccw_schib_region_ops,
					    sizeof(struct ccw_schib_region),
					    VFIO_REGION_INFO_FLAG_READ,
					    private->schib_region);
}

static ssize_t vfio_ccw_crw_region_read(struct vfio_ccw_private *private,
					char __user *buf, size_t count,
					loff_t *ppos)
{
	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
	struct ccw_crw_region *region;
	struct vfio_ccw_crw *crw;
	int ret;

	if (pos + count > sizeof(*region))
		return -EINVAL;

	crw = list_first_entry_or_null(&private->crw,
				       struct vfio_ccw_crw, next);

	if (crw)
		list_del(&crw->next);

	mutex_lock(&private->io_mutex);
	region = private->region[i].data;

	if (crw)
		memcpy(&region->crw, &crw->crw, sizeof(region->crw));

	if (copy_to_user(buf, (void *)region + pos, count))
		ret = -EFAULT;
	else
		ret = count;

	region->crw = 0;

	mutex_unlock(&private->io_mutex);

	kfree(crw);

	/* Notify the guest if more CRWs are on our queue */
	if (!list_empty(&private->crw) && private->crw_trigger)
		eventfd_signal(private->crw_trigger, 1);

	return ret;
}

static ssize_t vfio_ccw_crw_region_write(struct vfio_ccw_private *private,
					 const char __user *buf, size_t count,
					 loff_t *ppos)
{
	return -EINVAL;
}

static void vfio_ccw_crw_region_release(struct vfio_ccw_private *private,
					struct vfio_ccw_region *region)
{

}

const struct vfio_ccw_regops vfio_ccw_crw_region_ops = {
	.read = vfio_ccw_crw_region_read,
	.write = vfio_ccw_crw_region_write,
	.release = vfio_ccw_crw_region_release,
};

int vfio_ccw_register_crw_dev_regions(struct vfio_ccw_private *private)
{
	return vfio_ccw_register_dev_region(private,
					    VFIO_REGION_SUBTYPE_CCW_CRW,
					    &vfio_ccw_crw_region_ops,
					    sizeof(struct ccw_crw_region),
					    VFIO_REGION_INFO_FLAG_READ,
					    private->crw_region);
}
+12 −7
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
 *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
 */

#include <linux/ratelimit.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/iommu.h>
@@ -625,23 +626,27 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
 * the target channel program from @orb->cmd.iova to the new ccwchain(s).
 *
 * Limitations:
 * 1. Supports only prefetch enabled mode.
 * 2. Supports idal(c64) ccw chaining.
 * 3. Supports 4k idaw.
 * 1. Supports idal(c64) ccw chaining.
 * 2. Supports 4k idaw.
 *
 * Returns:
 *   %0 on success and a negative error value on failure.
 */
int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
{
	/* custom ratelimit used to avoid flood during guest IPL */
	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1);
	int ret;

	/*
	 * XXX:
	 * Only support prefetch enable mode now.
	 * We only support prefetching the channel program. We assume all channel
	 * programs executed by supported guests likewise support prefetching.
	 * Executing a channel program that does not specify prefetching will
	 * typically not cause an error, but a warning is issued to help identify
	 * the problem if something does break.
	 */
	if (!orb->cmd.pfch)
		return -EOPNOTSUPP;
	if (!orb->cmd.pfch && __ratelimit(&ratelimit_state))
		dev_warn(mdev, "Prefetching channel program even though prefetch not specified in ORB");

	INIT_LIST_HEAD(&cp->ccwchain_list);
	memcpy(&cp->orb, orb, sizeof(*orb));
+155 −10
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@

#include <asm/isc.h>

#include "chp.h"
#include "ioasm.h"
#include "css.h"
#include "vfio_ccw_private.h"
@@ -26,6 +27,8 @@
struct workqueue_struct *vfio_ccw_work_q;
static struct kmem_cache *vfio_ccw_io_region;
static struct kmem_cache *vfio_ccw_cmd_region;
static struct kmem_cache *vfio_ccw_schib_region;
static struct kmem_cache *vfio_ccw_crw_region;

debug_info_t *vfio_ccw_debug_msg_id;
debug_info_t *vfio_ccw_debug_trace_id;
@@ -105,6 +108,16 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
		eventfd_signal(private->io_trigger, 1);
}

static void vfio_ccw_crw_todo(struct work_struct *work)
{
	struct vfio_ccw_private *private;

	private = container_of(work, struct vfio_ccw_private, crw_work);

	if (!list_empty(&private->crw) && private->crw_trigger)
		eventfd_signal(private->crw_trigger, 1);
}

/*
 * Css driver callbacks
 */
@@ -116,6 +129,18 @@ static void vfio_ccw_sch_irq(struct subchannel *sch)
	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
}

static void vfio_ccw_free_regions(struct vfio_ccw_private *private)
{
	if (private->crw_region)
		kmem_cache_free(vfio_ccw_crw_region, private->crw_region);
	if (private->schib_region)
		kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
	if (private->cmd_region)
		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
	if (private->io_region)
		kmem_cache_free(vfio_ccw_io_region, private->io_region);
}

static int vfio_ccw_sch_probe(struct subchannel *sch)
{
	struct pmcw *pmcw = &sch->schib.pmcw;
@@ -147,6 +172,18 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
	if (!private->cmd_region)
		goto out_free;

	private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region,
						  GFP_KERNEL | GFP_DMA);

	if (!private->schib_region)
		goto out_free;

	private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region,
						GFP_KERNEL | GFP_DMA);

	if (!private->crw_region)
		goto out_free;

	private->sch = sch;
	dev_set_drvdata(&sch->dev, private);
	mutex_init(&private->io_mutex);
@@ -159,7 +196,9 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
	if (ret)
		goto out_free;

	INIT_LIST_HEAD(&private->crw);
	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
	INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
	atomic_set(&private->avail, 1);
	private->state = VFIO_CCW_STATE_STANDBY;

@@ -181,10 +220,7 @@ out_disable:
	cio_disable_subchannel(sch);
out_free:
	dev_set_drvdata(&sch->dev, NULL);
	if (private->cmd_region)
		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
	if (private->io_region)
		kmem_cache_free(vfio_ccw_io_region, private->io_region);
	vfio_ccw_free_regions(private);
	kfree(private->cp.guest_cp);
	kfree(private);
	return ret;
@@ -193,15 +229,20 @@ out_free:
static int vfio_ccw_sch_remove(struct subchannel *sch)
{
	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
	struct vfio_ccw_crw *crw, *temp;

	vfio_ccw_sch_quiesce(sch);

	list_for_each_entry_safe(crw, temp, &private->crw, next) {
		list_del(&crw->next);
		kfree(crw);
	}

	vfio_ccw_mdev_unreg(sch);

	dev_set_drvdata(&sch->dev, NULL);

	kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
	kmem_cache_free(vfio_ccw_io_region, private->io_region);
	vfio_ccw_free_regions(private);
	kfree(private->cp.guest_cp);
	kfree(private);

@@ -258,6 +299,83 @@ out_unlock:
	return rc;
}

static void vfio_ccw_queue_crw(struct vfio_ccw_private *private,
			       unsigned int rsc,
			       unsigned int erc,
			       unsigned int rsid)
{
	struct vfio_ccw_crw *crw;

	/*
	 * If unable to allocate a CRW, just drop the event and
	 * carry on.  The guest will either see a later one or
	 * learn when it issues its own store subchannel.
	 */
	crw = kzalloc(sizeof(*crw), GFP_ATOMIC);
	if (!crw)
		return;

	/*
	 * Build the CRW based on the inputs given to us.
	 */
	crw->crw.rsc = rsc;
	crw->crw.erc = erc;
	crw->crw.rsid = rsid;

	list_add_tail(&crw->next, &private->crw);
	queue_work(vfio_ccw_work_q, &private->crw_work);
}

static int vfio_ccw_chp_event(struct subchannel *sch,
			      struct chp_link *link, int event)
{
	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
	int mask = chp_ssd_get_mask(&sch->ssd_info, link);
	int retry = 255;

	if (!private || !mask)
		return 0;

	trace_vfio_ccw_chp_event(private->sch->schid, mask, event);
	VFIO_CCW_MSG_EVENT(2, "%pUl (%x.%x.%04x): mask=0x%x event=%d\n",
			   mdev_uuid(private->mdev), sch->schid.cssid,
			   sch->schid.ssid, sch->schid.sch_no,
			   mask, event);

	if (cio_update_schib(sch))
		return -ENODEV;

	switch (event) {
	case CHP_VARY_OFF:
		/* Path logically turned off */
		sch->opm &= ~mask;
		sch->lpm &= ~mask;
		if (sch->schib.pmcw.lpum & mask)
			cio_cancel_halt_clear(sch, &retry);
		break;
	case CHP_OFFLINE:
		/* Path is gone */
		if (sch->schib.pmcw.lpum & mask)
			cio_cancel_halt_clear(sch, &retry);
		vfio_ccw_queue_crw(private, CRW_RSC_CPATH, CRW_ERC_PERRN,
				   link->chpid.id);
		break;
	case CHP_VARY_ON:
		/* Path logically turned on */
		sch->opm |= mask;
		sch->lpm |= mask;
		break;
	case CHP_ONLINE:
		/* Path became available */
		sch->lpm |= mask & sch->opm;
		vfio_ccw_queue_crw(private, CRW_RSC_CPATH, CRW_ERC_INIT,
				   link->chpid.id);
		break;
	}

	return 0;
}

static struct css_device_id vfio_ccw_sch_ids[] = {
	{ .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
	{ /* end of list */ },
@@ -275,6 +393,7 @@ static struct css_driver vfio_ccw_sch_driver = {
	.remove = vfio_ccw_sch_remove,
	.shutdown = vfio_ccw_sch_shutdown,
	.sch_event = vfio_ccw_sch_event,
	.chp_event = vfio_ccw_chp_event,
};

static int __init vfio_ccw_debug_init(void)
@@ -304,6 +423,14 @@ static void vfio_ccw_debug_exit(void)
	debug_unregister(vfio_ccw_debug_trace_id);
}

static void vfio_ccw_destroy_regions(void)
{
	kmem_cache_destroy(vfio_ccw_crw_region);
	kmem_cache_destroy(vfio_ccw_schib_region);
	kmem_cache_destroy(vfio_ccw_cmd_region);
	kmem_cache_destroy(vfio_ccw_io_region);
}

static int __init vfio_ccw_sch_init(void)
{
	int ret;
@@ -336,6 +463,26 @@ static int __init vfio_ccw_sch_init(void)
		goto out_err;
	}

	vfio_ccw_schib_region = kmem_cache_create_usercopy("vfio_ccw_schib_region",
					sizeof(struct ccw_schib_region), 0,
					SLAB_ACCOUNT, 0,
					sizeof(struct ccw_schib_region), NULL);

	if (!vfio_ccw_schib_region) {
		ret = -ENOMEM;
		goto out_err;
	}

	vfio_ccw_crw_region = kmem_cache_create_usercopy("vfio_ccw_crw_region",
					sizeof(struct ccw_crw_region), 0,
					SLAB_ACCOUNT, 0,
					sizeof(struct ccw_crw_region), NULL);

	if (!vfio_ccw_crw_region) {
		ret = -ENOMEM;
		goto out_err;
	}

	isc_register(VFIO_CCW_ISC);
	ret = css_driver_register(&vfio_ccw_sch_driver);
	if (ret) {
@@ -346,8 +493,7 @@ static int __init vfio_ccw_sch_init(void)
	return ret;

out_err:
	kmem_cache_destroy(vfio_ccw_cmd_region);
	kmem_cache_destroy(vfio_ccw_io_region);
	vfio_ccw_destroy_regions();
	destroy_workqueue(vfio_ccw_work_q);
	vfio_ccw_debug_exit();
	return ret;
@@ -357,8 +503,7 @@ static void __exit vfio_ccw_sch_exit(void)
{
	css_driver_unregister(&vfio_ccw_sch_driver);
	isc_unregister(VFIO_CCW_ISC);
	kmem_cache_destroy(vfio_ccw_io_region);
	kmem_cache_destroy(vfio_ccw_cmd_region);
	vfio_ccw_destroy_regions();
	destroy_workqueue(vfio_ccw_work_q);
	vfio_ccw_debug_exit();
}
Loading