Commit 42d279f9 authored by Dave Jiang's avatar Dave Jiang Committed by Vinod Koul
Browse files

dmaengine: idxd: add char driver to expose submission portal to userland



Create a char device region that will allow acquisition of user portals in
order to allow applications to submit DMA operations. A char device will be
created per work queue that gets exposed. The workqueue type "user"
is used to mark a work queue for user char device. For example if the
workqueue 0 of DSA device 0 is marked for char device, then a device node
of /dev/dsa/wq0.0 will be created.

Signed-off-by: default avatarDave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/157965026985.73301.976523230037106742.stgit@djiang5-desk3.ch.intel.com


Signed-off-by: default avatarVinod Koul <vkoul@kernel.org>
parent 8f47d1a5
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
obj-$(CONFIG_INTEL_IDXD) += idxd.o
idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o
idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
+302 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/device.h>
#include <linux/sched/task.h>
#include <linux/intel-svm.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/cdev.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <uapi/linux/idxd.h>
#include "registers.h"
#include "idxd.h"

struct idxd_cdev_context {
	const char *name;
	dev_t devt;
	struct ida minor_ida;
};

/*
 * ictx is an array based off of accelerator types. enum idxd_type
 * is used as index
 */
static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = {
	{ .name = "dsa" },
};

struct idxd_user_context {
	struct idxd_wq *wq;
	struct task_struct *task;
	unsigned int flags;
};

enum idxd_cdev_cleanup {
	CDEV_NORMAL = 0,
	CDEV_FAILED,
};

static void idxd_cdev_dev_release(struct device *dev)
{
	dev_dbg(dev, "releasing cdev device\n");
	kfree(dev);
}

static struct device_type idxd_cdev_device_type = {
	.name = "idxd_cdev",
	.release = idxd_cdev_dev_release,
};

static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode)
{
	struct cdev *cdev = inode->i_cdev;

	return container_of(cdev, struct idxd_cdev, cdev);
}

static inline struct idxd_wq *idxd_cdev_wq(struct idxd_cdev *idxd_cdev)
{
	return container_of(idxd_cdev, struct idxd_wq, idxd_cdev);
}

static inline struct idxd_wq *inode_wq(struct inode *inode)
{
	return idxd_cdev_wq(inode_idxd_cdev(inode));
}

static int idxd_cdev_open(struct inode *inode, struct file *filp)
{
	struct idxd_user_context *ctx;
	struct idxd_device *idxd;
	struct idxd_wq *wq;
	struct device *dev;
	struct idxd_cdev *idxd_cdev;

	wq = inode_wq(inode);
	idxd = wq->idxd;
	dev = &idxd->pdev->dev;
	idxd_cdev = &wq->idxd_cdev;

	dev_dbg(dev, "%s called\n", __func__);

	if (idxd_wq_refcount(wq) > 1 && wq_dedicated(wq))
		return -EBUSY;

	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	ctx->wq = wq;
	filp->private_data = ctx;
	idxd_wq_get(wq);
	return 0;
}

static int idxd_cdev_release(struct inode *node, struct file *filep)
{
	struct idxd_user_context *ctx = filep->private_data;
	struct idxd_wq *wq = ctx->wq;
	struct idxd_device *idxd = wq->idxd;
	struct device *dev = &idxd->pdev->dev;

	dev_dbg(dev, "%s called\n", __func__);
	filep->private_data = NULL;

	kfree(ctx);
	idxd_wq_put(wq);
	return 0;
}

static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma,
		     const char *func)
{
	struct device *dev = &wq->idxd->pdev->dev;

	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
		dev_info_ratelimited(dev,
				     "%s: %s: mapping too large: %lu\n",
				     current->comm, func,
				     vma->vm_end - vma->vm_start);
		return -EINVAL;
	}

	return 0;
}

static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct idxd_user_context *ctx = filp->private_data;
	struct idxd_wq *wq = ctx->wq;
	struct idxd_device *idxd = wq->idxd;
	struct pci_dev *pdev = idxd->pdev;
	phys_addr_t base = pci_resource_start(pdev, IDXD_WQ_BAR);
	unsigned long pfn;
	int rc;

	dev_dbg(&pdev->dev, "%s called\n", __func__);
	rc = check_vma(wq, vma, __func__);

	vma->vm_flags |= VM_DONTCOPY;
	pfn = (base + idxd_get_wq_portal_full_offset(wq->id,
				IDXD_PORTAL_LIMITED)) >> PAGE_SHIFT;
	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
	vma->vm_private_data = ctx;

	return io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
			vma->vm_page_prot);
}

static __poll_t idxd_cdev_poll(struct file *filp,
			       struct poll_table_struct *wait)
{
	struct idxd_user_context *ctx = filp->private_data;
	struct idxd_wq *wq = ctx->wq;
	struct idxd_device *idxd = wq->idxd;
	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
	unsigned long flags;
	__poll_t out = 0;

	poll_wait(filp, &idxd_cdev->err_queue, wait);
	spin_lock_irqsave(&idxd->dev_lock, flags);
	if (idxd->sw_err.valid)
		out = EPOLLIN | EPOLLRDNORM;
	spin_unlock_irqrestore(&idxd->dev_lock, flags);

	return out;
}

static const struct file_operations idxd_cdev_fops = {
	.owner = THIS_MODULE,
	.open = idxd_cdev_open,
	.release = idxd_cdev_release,
	.mmap = idxd_cdev_mmap,
	.poll = idxd_cdev_poll,
};

int idxd_cdev_get_major(struct idxd_device *idxd)
{
	return MAJOR(ictx[idxd->type].devt);
}

static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
{
	struct idxd_device *idxd = wq->idxd;
	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
	struct idxd_cdev_context *cdev_ctx;
	struct device *dev;
	int minor, rc;

	idxd_cdev->dev = kzalloc(sizeof(*idxd_cdev->dev), GFP_KERNEL);
	if (!idxd_cdev->dev)
		return -ENOMEM;

	dev = idxd_cdev->dev;
	dev->parent = &idxd->pdev->dev;
	dev_set_name(dev, "%s/wq%u.%u", idxd_get_dev_name(idxd),
		     idxd->id, wq->id);
	dev->bus = idxd_get_bus_type(idxd);

	cdev_ctx = &ictx[wq->idxd->type];
	minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
	if (minor < 0) {
		rc = minor;
		goto ida_err;
	}

	dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
	dev->type = &idxd_cdev_device_type;
	rc = device_register(dev);
	if (rc < 0) {
		dev_err(&idxd->pdev->dev, "device register failed\n");
		put_device(dev);
		goto dev_reg_err;
	}
	idxd_cdev->minor = minor;

	return 0;

 dev_reg_err:
	ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt));
 ida_err:
	kfree(dev);
	idxd_cdev->dev = NULL;
	return rc;
}

static void idxd_wq_cdev_cleanup(struct idxd_wq *wq,
				 enum idxd_cdev_cleanup cdev_state)
{
	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
	struct idxd_cdev_context *cdev_ctx;

	cdev_ctx = &ictx[wq->idxd->type];
	if (cdev_state == CDEV_NORMAL)
		cdev_del(&idxd_cdev->cdev);
	device_unregister(idxd_cdev->dev);
	/*
	 * The device_type->release() will be called on the device and free
	 * the allocated struct device. We can just forget it.
	 */
	ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
	idxd_cdev->dev = NULL;
	idxd_cdev->minor = -1;
}

int idxd_wq_add_cdev(struct idxd_wq *wq)
{
	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
	struct cdev *cdev = &idxd_cdev->cdev;
	struct device *dev;
	int rc;

	rc = idxd_wq_cdev_dev_setup(wq);
	if (rc < 0)
		return rc;

	dev = idxd_cdev->dev;
	cdev_init(cdev, &idxd_cdev_fops);
	cdev_set_parent(cdev, &dev->kobj);
	rc = cdev_add(cdev, dev->devt, 1);
	if (rc) {
		dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc);
		idxd_wq_cdev_cleanup(wq, CDEV_FAILED);
		return rc;
	}

	init_waitqueue_head(&idxd_cdev->err_queue);
	return 0;
}

void idxd_wq_del_cdev(struct idxd_wq *wq)
{
	idxd_wq_cdev_cleanup(wq, CDEV_NORMAL);
}

int idxd_cdev_register(void)
{
	int rc, i;

	for (i = 0; i < IDXD_TYPE_MAX; i++) {
		ida_init(&ictx[i].minor_ida);
		rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK,
					 ictx[i].name);
		if (rc)
			return rc;
	}

	return 0;
}

void idxd_cdev_remove(void)
{
	int i;

	for (i = 0; i < IDXD_TYPE_MAX; i++) {
		unregister_chrdev_region(ictx[i].devt, MINORMASK);
		ida_destroy(&ictx[i].minor_ida);
	}
}
+1 −1
Original line number Diff line number Diff line
@@ -539,7 +539,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
	wq->wqcfg.wq_thresh = wq->threshold;

	/* byte 8-11 */
	wq->wqcfg.priv = 1; /* kernel, therefore priv */
	wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL);
	wq->wqcfg.mode = 1;

	wq->wqcfg.priority = wq->priority;
+37 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <linux/dmaengine.h>
#include <linux/percpu-rwsem.h>
#include <linux/wait.h>
#include <linux/cdev.h>
#include "registers.h"

#define IDXD_DRIVER_VERSION	"1.00"
@@ -63,6 +64,14 @@ enum idxd_wq_flag {
enum idxd_wq_type {
	IDXD_WQT_NONE = 0,
	IDXD_WQT_KERNEL,
	IDXD_WQT_USER,
};

struct idxd_cdev {
	struct cdev cdev;
	struct device *dev;
	int minor;
	struct wait_queue_head err_queue;
};

#define IDXD_ALLOCATED_BATCH_SIZE	128U
@@ -82,6 +91,7 @@ enum idxd_complete_type {
struct idxd_wq {
	void __iomem *dportal;
	struct device conf_dev;
	struct idxd_cdev idxd_cdev;
	struct idxd_device *idxd;
	int id;
	enum idxd_wq_type type;
@@ -145,6 +155,7 @@ struct idxd_device {
	enum idxd_device_state state;
	unsigned long flags;
	int id;
	int major;

	struct pci_dev *pdev;
	void __iomem *reg_base;
@@ -196,11 +207,29 @@ struct idxd_desc {
#define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev)
#define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)

extern struct bus_type dsa_bus_type;

static inline bool wq_dedicated(struct idxd_wq *wq)
{
	return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
}

enum idxd_portal_prot {
	IDXD_PORTAL_UNLIMITED = 0,
	IDXD_PORTAL_LIMITED,
};

static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot)
{
	return prot * 0x1000;
}

static inline int idxd_get_wq_portal_full_offset(int wq_id,
						 enum idxd_portal_prot prot)
{
	return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
}

static inline void idxd_set_type(struct idxd_device *idxd)
{
	struct pci_dev *pdev = idxd->pdev;
@@ -233,6 +262,7 @@ int idxd_setup_sysfs(struct idxd_device *idxd);
void idxd_cleanup_sysfs(struct idxd_device *idxd);
int idxd_register_driver(void);
void idxd_unregister_driver(void);
struct bus_type *idxd_get_bus_type(struct idxd_device *idxd);

/* device interrupt control */
irqreturn_t idxd_irq_handler(int vec, void *data);
@@ -276,4 +306,11 @@ void idxd_dma_complete_txd(struct idxd_desc *desc,
			   enum idxd_complete_type comp_type);
dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx);

/* cdev */
int idxd_cdev_register(void);
void idxd_cdev_remove(void);
int idxd_cdev_get_major(struct idxd_device *idxd);
int idxd_wq_add_cdev(struct idxd_wq *wq);
void idxd_wq_del_cdev(struct idxd_wq *wq);

#endif
+10 −0
Original line number Diff line number Diff line
@@ -188,6 +188,7 @@ static int idxd_setup_internals(struct idxd_device *idxd)
		mutex_init(&wq->wq_lock);
		atomic_set(&wq->dq_count, 0);
		init_waitqueue_head(&wq->submit_waitq);
		wq->idxd_cdev.minor = -1;
		rc = percpu_init_rwsem(&wq->submit_lock);
		if (rc < 0) {
			idxd_wqs_free_lock(idxd);
@@ -321,6 +322,8 @@ static int idxd_probe(struct idxd_device *idxd)
		goto err_idr_fail;
	}

	idxd->major = idxd_cdev_get_major(idxd);

	dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
	return 0;

@@ -501,6 +504,10 @@ static int __init idxd_init_module(void)
	if (err < 0)
		goto err_idxd_driver_register;

	err = idxd_cdev_register();
	if (err)
		goto err_cdev_register;

	err = pci_register_driver(&idxd_pci_driver);
	if (err)
		goto err_pci_register;
@@ -508,6 +515,8 @@ static int __init idxd_init_module(void)
	return 0;

err_pci_register:
	idxd_cdev_remove();
err_cdev_register:
	idxd_unregister_driver();
err_idxd_driver_register:
	idxd_unregister_bus_type();
@@ -518,6 +527,7 @@ module_init(idxd_init_module);
static void __exit idxd_exit_module(void)
{
	pci_unregister_driver(&idxd_pci_driver);
	idxd_cdev_remove();
	idxd_unregister_bus_type();
}
module_exit(idxd_exit_module);
Loading